{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from tqdm import tqdm\n",
    "from sklearn.metrics import mean_squared_error,explained_variance_score\n",
    "from sklearn.model_selection import KFold\n",
    "import lightgbm as lgb\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "train_gps_path = 'train_data_clean.csv'\n",
    "train_data_path = 'train0711.csv'\n",
    "test_data_path = 'testB.csv'\n",
    "order_data_path = 'loadingOrderEvent.csv'\n",
    "port_data_path = 'port.csv'\n",
    "\n",
    "# 取前1000000行\n",
    "debug = True\n",
    "NDATA = 50000000\n",
    "skip = 4\n",
    "\n",
    "MMSI_map = {}\n",
    "\n",
    "def reduce_mem_usage(props):\n",
    "    # 计算当前内存\n",
    "    start_mem_usg = props.memory_usage().sum() / 1024 ** 2\n",
    "    print(\"Memory usage of the dataframe is :\", start_mem_usg, \"MB\")\n",
    "    \n",
    "    # 哪些列包含空值，空值用-999填充。why：因为np.nan当做float处理\n",
    "    NAlist = []\n",
    "    for col in props.columns:\n",
    "        # 这里只过滤了objectd格式，如果你的代码中还包含其他类型，请一并过滤\n",
    "        if (props[col].dtypes != object):\n",
    "            \n",
    "            print(\"**************************\")\n",
    "            print(\"columns: \", col)\n",
    "            print(\"dtype before\", props[col].dtype)\n",
    "            \n",
    "            # 判断是否是int类型\n",
    "            isInt = False\n",
    "            mmax = props[col].max()\n",
    "            mmin = props[col].min()\n",
    "            \n",
    "            # # Integer does not support NA, therefore Na needs to be filled\n",
    "            # if not np.isfinite(props[col]).all():\n",
    "            #     NAlist.append(col)\n",
    "            #     props[col].fillna(-999, inplace=True) # 用-999填充\n",
    "                \n",
    "            # test if column can be converted to an integer\n",
    "            asint = props[col].fillna(0).astype(np.int64)\n",
    "            result = np.fabs(props[col] - asint)\n",
    "            result = result.sum()\n",
    "            if result < 0.01: # 绝对误差和小于0.01认为可以转换的，要根据task修改\n",
    "                isInt = True\n",
    "            \n",
    "            # make interger / unsigned Integer datatypes\n",
    "            if isInt:\n",
    "                if mmin >= 0: # 最小值大于0，转换成无符号整型\n",
    "                    if mmax <= 255:\n",
    "                        props[col] = props[col].astype(np.uint8)\n",
    "                    elif mmax <= 65535:\n",
    "                        props[col] = props[col].astype(np.uint16)\n",
    "                    elif mmax <= 4294967295:\n",
    "                        props[col] = props[col].astype(np.uint32)\n",
    "                    else:\n",
    "                        props[col] = props[col].astype(np.uint64)\n",
    "                else: # 转换成有符号整型\n",
    "                    if mmin > np.iinfo(np.int8).min and mmax < np.iinfo(np.int8).max:\n",
    "                        props[col] = props[col].astype(np.int8)\n",
    "                    elif mmin > np.iinfo(np.int16).min and mmax < np.iinfo(np.int16).max:\n",
    "                        props[col] = props[col].astype(np.int16)\n",
    "                    elif mmin > np.iinfo(np.int32).min and mmax < np.iinfo(np.int32).max:\n",
    "                        props[col] = props[col].astype(np.int32)\n",
    "                    elif mmin > np.iinfo(np.int64).min and mmax < np.iinfo(np.int64).max:\n",
    "                        props[col] = props[col].astype(np.int64)  \n",
    "            else: # 注意：这里对于float都转换成float16，需要根据你的情况自己更改\n",
    "                props[col] = props[col].astype(np.float16)\n",
    "            \n",
    "            print(\"dtype after\", props[col].dtype)\n",
    "            print(\"********************************\")\n",
    "    print(\"___MEMORY USAGE AFTER COMPLETION:___\")\n",
    "    mem_usg = props.memory_usage().sum() / 1024**2 \n",
    "    print(\"Memory usage is: \",mem_usg,\" MB\")\n",
    "    print(\"This is \",100*mem_usg/start_mem_usg,\"% of the initial size\")\n",
    "    return props#, NAlist\n",
    "\n",
    "# 处理数据，将时间转换为datatime格式\n",
    "def get_data(data, mode='train'):\n",
    "    \n",
    "    assert mode == 'train' or mode == 'test' or mode == 'port'\n",
    "    \n",
    "    if mode == 'train' or mode == 'test':\n",
    "        if mode == 'test':\n",
    "            data['temp_timestamp'] = data['timestamp']\n",
    "            data['onboardDate'] = pd.to_datetime(data['onboardDate'], infer_datetime_format = True)\n",
    "        data['timestamp'] = pd.to_datetime(data['timestamp'], infer_datetime_format=True)\n",
    "        data['longitude'] = data['longitude'].astype(float)\n",
    "        data['loadingOrder'] = data['loadingOrder'].astype(str)\n",
    "        data['latitude'] = data['latitude'].astype(float)\n",
    "        data['speed'] = data['speed'].astype(float)\n",
    "    elif mode == 'port':\n",
    "        # data['LONGITUDE'] = data['LONGITUDE'].astype(float)\n",
    "        # data['LATITUDE'] = data['LATITUDE'].astype(float)\n",
    "        data['LONGITUDE'] = pd.to_numeric(data['LONGITUDE'])\n",
    "        data['LATITUDE'] =  pd.to_numeric(data['LATITUDE'])\n",
    "        data['LONGITUDE'] = data['LONGITUDE'].astype(float)\n",
    "        data['LATITUDE'] = data['LATITUDE'].astype(float)\n",
    "    return data\n",
    "import gc \n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "0it [00:00, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516845703125 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.006591796875  MB\n",
      "This is  64.06250718749857 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "1it [00:17, 17.96s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "2it [00:35, 17.84s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "3it [00:55, 18.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "4it [01:17, 19.42s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "5it [01:41, 20.92s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "6it [02:09, 22.98s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "7it [02:39, 25.21s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "8it [03:13, 27.66s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "9it [03:49, 30.26s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "10it [04:28, 32.83s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "11it [05:08, 35.13s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "12it [05:52, 37.70s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "13it [06:44, 42.10s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "14it [07:34, 44.30s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 610.3516883850098 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  391.00659561157227  MB\n",
      "This is  64.06250741210785 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "15it [08:25, 46.27s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of the dataframe is : 118.93652725219727 MB\n",
      "**************************\n",
      "columns:  longitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  latitude\n",
      "dtype before float64\n",
      "dtype after float16\n",
      "********************************\n",
      "**************************\n",
      "columns:  speed\n",
      "dtype before int64\n",
      "dtype after uint8\n",
      "********************************\n",
      "**************************\n",
      "columns:  direction\n",
      "dtype before int64\n",
      "dtype after int32\n",
      "********************************\n",
      "___MEMORY USAGE AFTER COMPLETION:___\n",
      "Memory usage is:  76.19375801086426  MB\n",
      "This is  64.06253803703238 % of the initial size\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "16it [09:07, 34.21s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "119568756\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5738029</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:12:59+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12670</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5739749</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:22:38+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14790</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5740997</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:30:55+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21510</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5741931</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:37:35+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19900</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5743324</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:45:56+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21360</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48755414</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:00:39+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35420</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48756216</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:10+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35320</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48757112</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:50+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34810</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48758282</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:02:30+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34710</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48759239</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:03:11+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34610</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>119568756 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            loadingOrder                 timestamp  longitude   latitude  \\\n",
       "5738029   AA191175561416 2019-01-28 16:12:59+00:00     114.25  22.578125   \n",
       "5739749   AA191175561416 2019-01-28 16:22:38+00:00     114.25  22.578125   \n",
       "5740997   AA191175561416 2019-01-28 16:30:55+00:00     114.25  22.578125   \n",
       "5741931   AA191175561416 2019-01-28 16:37:35+00:00     114.25  22.578125   \n",
       "5743324   AA191175561416 2019-01-28 16:45:56+00:00     114.25  22.578125   \n",
       "...                  ...                       ...        ...        ...   \n",
       "48755414  ZZ992682575994 2019-03-11 16:00:39+00:00     101.25   2.796875   \n",
       "48756216  ZZ992682575994 2019-03-11 16:01:10+00:00     101.25   2.796875   \n",
       "48757112  ZZ992682575994 2019-03-11 16:01:50+00:00     101.25   2.796875   \n",
       "48758282  ZZ992682575994 2019-03-11 16:02:30+00:00     101.25   2.796875   \n",
       "48759239  ZZ992682575994 2019-03-11 16:03:11+00:00     101.25   2.796875   \n",
       "\n",
       "           vesselMMSI  speed direction TRANSPORT_TRACE  \n",
       "5738029   Y7540547327    0.0     12670     CNYTN-MXZLO  \n",
       "5739749   Y7540547327    0.0     14790     CNYTN-MXZLO  \n",
       "5740997   Y7540547327    0.0     21510     CNYTN-MXZLO  \n",
       "5741931   Y7540547327    0.0     19900     CNYTN-MXZLO  \n",
       "5743324   Y7540547327    0.0     21360     CNYTN-MXZLO  \n",
       "...               ...    ...       ...             ...  \n",
       "48755414  V8626322848    2.0     35420     CNSHK-MYPKE  \n",
       "48756216  V8626322848    2.0     35320     CNSHK-MYPKE  \n",
       "48757112  V8626322848    2.0     34810     CNSHK-MYPKE  \n",
       "48758282  V8626322848    2.0     34710     CNSHK-MYPKE  \n",
       "48759239  V8626322848    2.0     34610     CNSHK-MYPKE  \n",
       "\n",
       "[119568756 rows x 8 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if __name__ == '__main__':\n",
    "    names = ['loadingOrder', 'timestamp', 'longitude', 'latitude', 'vesselMMSI', 'speed', 'direction', 'TRANSPORT_TRACE']\n",
    "    chunksize = True\n",
    "    if chunksize:\n",
    "        #train_flux = pd.read_csv(train_data_path, usecols = [0, 2, 3, 4, 5, 6, 7, 12], names = names, chunksize = 10000000)\n",
    "        train_flux = pd.read_csv(train_gps_path, chunksize = 10000000)\n",
    "        train_data = pd.DataFrame(columns = names)\n",
    "        for data in tqdm(train_flux):\n",
    "            temp_data = reduce_mem_usage(data)\n",
    "            train_data = train_data.append(temp_data)\n",
    "    else:\n",
    "        train_data = pd.read_csv(train_gps_path, nrows = 100000)\n",
    "        train_data = reduce_mem_usage(train_data)\n",
    "\n",
    "    #train_data.drop_duplicates(subset = ['loadingOrder', 'timestamp', 'longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "    print(train_data.shape[0])\n",
    "    \n",
    "    # train_data = train_data.copy()\n",
    "    # train_data.to_csv('train_clean.csv', index = False)\n",
    "    \n",
    "    # 删除没有trace信息的数据\n",
    "    #train_data = train_data[~train_data['TRANSPORT_TRACE'].isin([np.nan])]\n",
    "    #print(train_data.shape[0])\n",
    "    #train_data.to_csv('train_trace_1.csv', index = False)\n",
    "    \n",
    "    # 删除trace长度小于2的\n",
    "    #train_data = train_data[train_data['TRANSPORT_TRACE'].str.split('-').str.len() >= 2]\n",
    "    #print(train_data.shape[0])\n",
    "    #train_data.to_csv('train_trace_2.csv', index = False)\n",
    "    \n",
    "    # 删除重复的GPS数据（相同订单号，相同时间和相同经纬度）\n",
    "\n",
    "#train_data.sort_values(['loadingOrder', 'timestamp'], inplace = True)\n",
    "#train_data.to_csv('clean_sort_data.csv', index = False)\n",
    "#train_data = train_data.iloc[1:, :]\n",
    "train_data = get_data(train_data, mode = 'train')\n",
    "#train_2_port = train_data[train_data['TRANSPORT_TRACE'].str.split('-').str.len() == 2]\n",
    "#train_3_port = train_data[train_data['TRANSPORT_TRACE'].str.split('-').str.len() > 2]\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:12:59+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12670</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:22:38+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14790</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>9.650000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:30:55+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21510</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>8.283333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:37:35+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19900</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>6.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:45:56+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21360</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>8.350000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119568751</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:00:39+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35420</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.466667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119568752</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:10+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35320</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.516667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119568753</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:50+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34810</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119568754</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:02:30+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34710</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119568755</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:03:11+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34610</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.683333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>119568756 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             loadingOrder                 timestamp  longitude   latitude  \\\n",
       "0          AA191175561416 2019-01-28 16:12:59+00:00     114.25  22.578125   \n",
       "1          AA191175561416 2019-01-28 16:22:38+00:00     114.25  22.578125   \n",
       "2          AA191175561416 2019-01-28 16:30:55+00:00     114.25  22.578125   \n",
       "3          AA191175561416 2019-01-28 16:37:35+00:00     114.25  22.578125   \n",
       "4          AA191175561416 2019-01-28 16:45:56+00:00     114.25  22.578125   \n",
       "...                   ...                       ...        ...        ...   \n",
       "119568751  ZZ992682575994 2019-03-11 16:00:39+00:00     101.25   2.796875   \n",
       "119568752  ZZ992682575994 2019-03-11 16:01:10+00:00     101.25   2.796875   \n",
       "119568753  ZZ992682575994 2019-03-11 16:01:50+00:00     101.25   2.796875   \n",
       "119568754  ZZ992682575994 2019-03-11 16:02:30+00:00     101.25   2.796875   \n",
       "119568755  ZZ992682575994 2019-03-11 16:03:11+00:00     101.25   2.796875   \n",
       "\n",
       "            vesselMMSI  speed  direction TRANSPORT_TRACE  diff_time  diff_lat  \\\n",
       "0          Y7540547327    0.0      12670     CNYTN-MXZLO   0.000000       0.0   \n",
       "1          Y7540547327    0.0      14790     CNYTN-MXZLO   9.650000       0.0   \n",
       "2          Y7540547327    0.0      21510     CNYTN-MXZLO   8.283333       0.0   \n",
       "3          Y7540547327    0.0      19900     CNYTN-MXZLO   6.666667       0.0   \n",
       "4          Y7540547327    0.0      21360     CNYTN-MXZLO   8.350000       0.0   \n",
       "...                ...    ...        ...             ...        ...       ...   \n",
       "119568751  V8626322848    2.0      35420     CNSHK-MYPKE   0.466667       0.0   \n",
       "119568752  V8626322848    2.0      35320     CNSHK-MYPKE   0.516667       0.0   \n",
       "119568753  V8626322848    2.0      34810     CNSHK-MYPKE   0.666667       0.0   \n",
       "119568754  V8626322848    2.0      34710     CNSHK-MYPKE   0.666667       0.0   \n",
       "119568755  V8626322848    2.0      34610     CNSHK-MYPKE   0.683333       0.0   \n",
       "\n",
       "           diff_lon  \n",
       "0               0.0  \n",
       "1               0.0  \n",
       "2               0.0  \n",
       "3               0.0  \n",
       "4               0.0  \n",
       "...             ...  \n",
       "119568751       0.0  \n",
       "119568752       0.0  \n",
       "119568753       0.0  \n",
       "119568754       0.0  \n",
       "119568755       0.0  \n",
       "\n",
       "[119568756 rows x 11 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#train_data['diff_time'] = train_data.groupby('loadingOrder')['timestamp'].diff(1).dt.total_seconds() / 60\n",
    "#train_data['diff_lat'] = train_data.groupby('loadingOrder')['latitude'].diff(1)\n",
    "#train_data['diff_lon'] = train_data.groupby('loadingOrder')['longitude'].diff(1)\n",
    "#train_data.fillna(0, inplace = True)\n",
    "#train_data.reset_index(inplace = True, drop = True)\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NJ169522947117</td>\n",
       "      <td>CNSHK-SGSIN</td>\n",
       "      <td>110.760670</td>\n",
       "      <td>12.794997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SX540070026140</td>\n",
       "      <td>CNSHK-MYTPP</td>\n",
       "      <td>110.662020</td>\n",
       "      <td>12.281523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ZV919459607351</td>\n",
       "      <td>CNSHK-BHBAH</td>\n",
       "      <td>66.776400</td>\n",
       "      <td>17.426293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AE378244933121</td>\n",
       "      <td>CNSHK-EGPSD</td>\n",
       "      <td>71.262752</td>\n",
       "      <td>7.878960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>FQ684650477699</td>\n",
       "      <td>CNYTN-BRSSZ</td>\n",
       "      <td>105.383015</td>\n",
       "      <td>3.054528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>234</th>\n",
       "      <td>AM438554857021</td>\n",
       "      <td>CNYTN-PAONX</td>\n",
       "      <td>114.336137</td>\n",
       "      <td>22.557467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>JY538030984696</td>\n",
       "      <td>CNYTN-PAONX</td>\n",
       "      <td>115.326340</td>\n",
       "      <td>22.234330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236</th>\n",
       "      <td>HM477496559738</td>\n",
       "      <td>CNSHK-KRINC</td>\n",
       "      <td>121.177048</td>\n",
       "      <td>26.465133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237</th>\n",
       "      <td>XH403100738321</td>\n",
       "      <td>CNSHK-SGSIN-AEJEA</td>\n",
       "      <td>113.898995</td>\n",
       "      <td>22.385950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>OY547556724291</td>\n",
       "      <td>CNDCB-SGSIN</td>\n",
       "      <td>113.851917</td>\n",
       "      <td>22.531967</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>239 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       loadingOrder    TRANSPORT_TRACE   longitude   latitude\n",
       "0    NJ169522947117        CNSHK-SGSIN  110.760670  12.794997\n",
       "1    SX540070026140        CNSHK-MYTPP  110.662020  12.281523\n",
       "2    ZV919459607351        CNSHK-BHBAH   66.776400  17.426293\n",
       "3    AE378244933121        CNSHK-EGPSD   71.262752   7.878960\n",
       "4    FQ684650477699        CNYTN-BRSSZ  105.383015   3.054528\n",
       "..              ...                ...         ...        ...\n",
       "234  AM438554857021        CNYTN-PAONX  114.336137  22.557467\n",
       "235  JY538030984696        CNYTN-PAONX  115.326340  22.234330\n",
       "236  HM477496559738        CNSHK-KRINC  121.177048  26.465133\n",
       "237  XH403100738321  CNSHK-SGSIN-AEJEA  113.898995  22.385950\n",
       "238  OY547556724291        CNDCB-SGSIN  113.851917  22.531967\n",
       "\n",
       "[239 rows x 4 columns]"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 求出测试数据中所有的港口信息\n",
    "# 读取测试数据\n",
    "test_data = pd.read_csv(test_data_path)\n",
    "test_data = get_data(test_data, mode = 'test')\n",
    "# 获取测试数据的港口信息\n",
    "test_trace = pd.DataFrame()\n",
    "test_trace[['loadingOrder', 'TRANSPORT_TRACE', 'longitude', 'latitude']] = test_data[['loadingOrder', 'TRANSPORT_TRACE', 'longitude', 'latitude']]\n",
    "test_trace.drop_duplicates(subset = ['loadingOrder', 'TRANSPORT_TRACE'], keep = 'first', inplace = True)\n",
    "test_trace.reset_index(inplace = True)\n",
    "del test_trace['index']\n",
    "test_trace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1724\n",
      "3004\n",
      "2172\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2419"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 删除中间跳跃较大的数据\n",
    "del_order = []\n",
    "\n",
    "del_order3 = np.array(train_data[train_data['speed'] >= 65]['loadingOrder'])\n",
    "del_order3 = set(del_order3)\n",
    "del_order3 = list(del_order3)\n",
    "print(len(del_order3))\n",
    "\n",
    "\n",
    "del_order2 = np.array(train_data[abs(train_data['diff_lon']) + abs(train_data['diff_lat']) >= 35]['loadingOrder'])\n",
    "del_order2 = set(del_order2)\n",
    "del_order2 = list(del_order2)\n",
    "print(len(del_order2))\n",
    "    \n",
    "del_order1 = np.array(train_data[abs(train_data['diff_lon']) > 180]['loadingOrder'])\n",
    "del_order1 = set(del_order1)\n",
    "del_order1 = list(del_order1)\n",
    "print(len(del_order1))\n",
    "\n",
    "for i in range(len(del_order2)):\n",
    "    if del_order2[i] not in del_order1:\n",
    "        del_order.append(del_order2[i])\n",
    "\n",
    "for i in range(len(del_order3)):\n",
    "    del_order.append(del_order3[i])\n",
    "#train_back = train_data[train_data['diff_time'] >= 14400]\n",
    "#for i in train_back.index:\n",
    "#    del_order.append(train_back.loc[i, 'loadingOrder'])\n",
    "del_order = set(del_order)\n",
    "del_order = list(del_order)\n",
    "len(del_order)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:12:59+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12670</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:22:38+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14790</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>9.650000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:30:55+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21510</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>8.283333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:37:35+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19900</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>6.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AA191175561416</td>\n",
       "      <td>2019-01-28 16:45:56+00:00</td>\n",
       "      <td>114.25</td>\n",
       "      <td>22.578125</td>\n",
       "      <td>Y7540547327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21360</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>8.350000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101670888</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:00:39+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35420</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.466667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101670889</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:10+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35320</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.516667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101670890</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:01:50+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34810</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101670891</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:02:30+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34710</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101670892</th>\n",
       "      <td>ZZ992682575994</td>\n",
       "      <td>2019-03-11 16:03:11+00:00</td>\n",
       "      <td>101.25</td>\n",
       "      <td>2.796875</td>\n",
       "      <td>V8626322848</td>\n",
       "      <td>2.0</td>\n",
       "      <td>34610</td>\n",
       "      <td>CNSHK-MYPKE</td>\n",
       "      <td>0.683333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>101670893 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             loadingOrder                 timestamp  longitude   latitude  \\\n",
       "0          AA191175561416 2019-01-28 16:12:59+00:00     114.25  22.578125   \n",
       "1          AA191175561416 2019-01-28 16:22:38+00:00     114.25  22.578125   \n",
       "2          AA191175561416 2019-01-28 16:30:55+00:00     114.25  22.578125   \n",
       "3          AA191175561416 2019-01-28 16:37:35+00:00     114.25  22.578125   \n",
       "4          AA191175561416 2019-01-28 16:45:56+00:00     114.25  22.578125   \n",
       "...                   ...                       ...        ...        ...   \n",
       "101670888  ZZ992682575994 2019-03-11 16:00:39+00:00     101.25   2.796875   \n",
       "101670889  ZZ992682575994 2019-03-11 16:01:10+00:00     101.25   2.796875   \n",
       "101670890  ZZ992682575994 2019-03-11 16:01:50+00:00     101.25   2.796875   \n",
       "101670891  ZZ992682575994 2019-03-11 16:02:30+00:00     101.25   2.796875   \n",
       "101670892  ZZ992682575994 2019-03-11 16:03:11+00:00     101.25   2.796875   \n",
       "\n",
       "            vesselMMSI  speed  direction TRANSPORT_TRACE  diff_time  diff_lat  \\\n",
       "0          Y7540547327    0.0      12670     CNYTN-MXZLO   0.000000       0.0   \n",
       "1          Y7540547327    0.0      14790     CNYTN-MXZLO   9.650000       0.0   \n",
       "2          Y7540547327    0.0      21510     CNYTN-MXZLO   8.283333       0.0   \n",
       "3          Y7540547327    0.0      19900     CNYTN-MXZLO   6.666667       0.0   \n",
       "4          Y7540547327    0.0      21360     CNYTN-MXZLO   8.350000       0.0   \n",
       "...                ...    ...        ...             ...        ...       ...   \n",
       "101670888  V8626322848    2.0      35420     CNSHK-MYPKE   0.466667       0.0   \n",
       "101670889  V8626322848    2.0      35320     CNSHK-MYPKE   0.516667       0.0   \n",
       "101670890  V8626322848    2.0      34810     CNSHK-MYPKE   0.666667       0.0   \n",
       "101670891  V8626322848    2.0      34710     CNSHK-MYPKE   0.666667       0.0   \n",
       "101670892  V8626322848    2.0      34610     CNSHK-MYPKE   0.683333       0.0   \n",
       "\n",
       "           diff_lon  \n",
       "0               0.0  \n",
       "1               0.0  \n",
       "2               0.0  \n",
       "3               0.0  \n",
       "4               0.0  \n",
       "...             ...  \n",
       "101670888       0.0  \n",
       "101670889       0.0  \n",
       "101670890       0.0  \n",
       "101670891       0.0  \n",
       "101670892       0.0  \n",
       "\n",
       "[101670893 rows x 11 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data = train_data[~train_data['loadingOrder'].isin(del_order)]\n",
    "train_data.reset_index(inplace = True, drop = True)\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_label = np.zeros(len(test_trace))\n",
    "from geopy.distance import distance\n",
    "indexes = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "np.save('8_7_A_index.npy', indexes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start matching!!!\n",
      "2\n",
      "['CNSHK', 'BHBAH'] 17.426292999999998 66.7764 26.135392 50.618590000000005\n",
      "中间港到达\n",
      "cur_label: 10\n",
      "253.24916666666667 114.84027777777777\n",
      "Matching number:  8 214.69600694444446 1926.61046864011\n",
      "\n",
      "\n",
      "3\n",
      "['CNSHK', 'EGPSD'] 7.87896 71.262752 31.265289000000003 32.301866\n",
      "中间港到达\n",
      "cur_label: 6\n",
      "204.42055555555555 170.13222222222223\n",
      "Matching number:  6 186.01099537037038 4797.509270376551\n",
      "\n",
      "\n",
      "4\n",
      "['CNYTN', 'BRSSZ'] 3.054528 105.38301499999999 -23.954513000000002 -46.28402\n",
      "cur_label: 164\n",
      "717.1077777777778 511.265\n",
      "Matching number:  162 588.4013040123457 16199.644655948352\n",
      "\n",
      "\n",
      "5\n",
      "['CNNSA', 'MYTPP', 'SGSIN', 'ZACPT', 'CGPNR', 'GALBV', 'CMKBI'] -22.893365 14.49949 2.939002 9.906216\n",
      "中间港到达\n",
      "cur_label: 20\n",
      "697.32 230.47333333333333\n",
      "Matching number:  19 427.19412280701755 2900.940755578093\n",
      "\n",
      "\n",
      "6\n",
      "['CNSHK', 'SGSIN', 'MYPKG'] 8.258532 108.09176699999999 3.034709 101.361204\n",
      "中间港到达\n",
      "cur_label: 31\n",
      "123.7025 45.80388888888889\n",
      "Matching number:  31 75.42032258064518 943.0168335283284\n",
      "\n",
      "\n",
      "7\n",
      "['CNYTN', 'SGSIN', 'EGSUZ', 'GRPIR', 'ITSPE', 'ITGOA'] 27.402341999999997 34.213678 44.40565 8.946256\n",
      "cur_label: 13\n",
      "218.61944444444444 200.19333333333333\n",
      "Matching number:  13 201.99423076923077 2938.3798773417943\n",
      "\n",
      "\n",
      "8\n",
      "['CNYTN', 'BDCGP'] 22.569772 114.265817 22.304439000000002 91.79332099999999\n",
      "cur_label: 24\n",
      "442.5227777777778 254.32722222222222\n",
      "Matching number:  22 337.120321969697 2311.3748186032267\n",
      "\n",
      "\n",
      "9\n",
      "['CNSHK', 'MYTPP', 'MUPLU', 'ZADUR'] 21.191553 114.07448000000001 -29.868304 31.050079999999998\n",
      "中间港到达\n",
      "cur_label: 11\n",
      "702.7552777777778 444.95972222222224\n",
      "Matching number:  10 527.9822916666667 10524.518134275473\n",
      "\n",
      "\n",
      "10\n",
      "['CNYTN', 'MXZLO'] 22.56123 114.304023 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13784.794292787243\n",
      "\n",
      "\n",
      "11\n",
      "['CNYTN', 'MXZLO'] 22.56261 114.295138 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13785.333329658666\n",
      "\n",
      "\n",
      "12\n",
      "['CNSHK', 'BHBAH'] 22.44455 113.884267 26.135392 50.618590000000005\n",
      "中间港到达\n",
      "cur_label: 20\n",
      "601.9211111111111 383.835\n",
      "Matching number:  17 483.5931862745098 6372.9213533529755\n",
      "\n",
      "\n",
      "13\n",
      "['CNSHK', 'THLCH'] 22.4458 113.88893300000001 13.079153 100.88608\n",
      "cur_label: 162\n",
      "170.23388888888888 82.88805555555555\n",
      "Matching number:  160 109.32891145833334 1723.3136187024231\n",
      "\n",
      "\n",
      "14\n",
      "['CNYTN', 'MXZLO'] 22.568727 114.26636299999998 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13786.949361273077\n",
      "\n",
      "\n",
      "15\n",
      "['CNYTN', 'SGSIN', 'EGSUZ', 'GRPIR', 'ITSPE', 'ITGOA'] 41.309492 2.149477 44.40565 8.946256\n",
      "中间港到达\n",
      "cur_label: 7\n",
      "253.81583333333333 36.026944444444446\n",
      "Matching number:  6 52.37759259259259 653.0237552328438\n",
      "\n",
      "\n",
      "16\n",
      "['CNSHK', 'IDJKT'] 22.462007 113.89826299999999 -6.119484 106.881179\n",
      "cur_label: 442\n",
      "201.46694444444444 98.83416666666666\n",
      "Matching number:  429 118.40747215747217 3252.804963798543\n",
      "\n",
      "\n",
      "17\n",
      "['CNYTN', 'NZAKL'] 22.561515 114.257737 -36.844873 174.78561399999998\n",
      "cur_label: 9\n",
      "419.12416666666667 321.6722222222222\n",
      "Matching number:  8 346.5137152777778 9138.652800409749\n",
      "\n",
      "\n",
      "18\n",
      "['CNDCB', 'SGSIN'] 22.531132999999997 113.8512 1.3031409999999999 103.70461999999999\n",
      "cur_label: 34\n",
      "92.95 77.91638888888889\n",
      "Matching number:  34 82.88633578431373 2592.552296787444\n",
      "\n",
      "\n",
      "19\n",
      "['CNYTN', 'MXZLO'] 22.531 114.38083300000001 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13781.559007079126\n",
      "\n",
      "\n",
      "20\n",
      "['CNSHK', 'KRINC'] 22.958763 117.65629799999999 37.401819 126.626175\n",
      "中间港到达\n",
      "cur_label: 12\n",
      "74.36333333333333 53.32972222222222\n",
      "Matching number:  12 65.84629629629629 1817.0252673109721\n",
      "\n",
      "\n",
      "21\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 22.63745 113.68783 25.022073000000002 55.04979\n",
      "cur_label: 29\n",
      "345.6769444444444 311.65277777777777\n",
      "Matching number:  29 315.7523898467433 5932.990957652719\n",
      "\n",
      "\n",
      "22\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 17.426292999999998 66.7764 25.022073000000002 55.04979\n",
      "cur_label: 43\n",
      "72.70111111111112 57.022777777777776\n",
      "Matching number:  43 61.98288759689923 1478.3478721053748\n",
      "\n",
      "\n",
      "23\n",
      "['CNSHK', 'MYPKG'] 4.9926330000000005 106.595033 3.034709 101.361204\n",
      "cur_label: 106\n",
      "70.30083333333333 27.148055555555555\n",
      "Matching number:  106 33.35751572327044 620.196241313892\n",
      "\n",
      "\n",
      "24\n",
      "['CNSHK', 'SGSIN'] 22.356333 114.072667 1.3031409999999999 103.70461999999999\n",
      "cur_label: 114\n",
      "214.9 72.23805555555556\n",
      "Matching number:  113 88.3001941986234 2585.5547603337463\n",
      "\n",
      "\n",
      "25\n",
      "['CNYTN', 'HRRIJ'] 1.268815 103.783957 45.342132 14.427023000000002\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 137\n",
      "877.5455555555556 477.99583333333334\n",
      "Matching number:  135 573.4464526748972 9860.260980037076\n",
      "\n",
      "\n",
      "26\n",
      "['CNYTN', 'BRSSZ'] 2.952038 100.876537 -23.954513000000002 -46.28402\n",
      "cur_label: 219\n",
      "670.4941666666666 459.4977777777778\n",
      "Matching number:  217 547.9723438300051 15799.593174269323\n",
      "\n",
      "\n",
      "27\n",
      "['CNSHK', 'MYTPP'] 11.837765 110.86951499999999 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 26\n",
      "140.66222222222223 44.602222222222224\n",
      "Matching number:  25 75.32667222222221 1409.5048201446734\n",
      "\n",
      "\n",
      "28\n",
      "['CNSHK', 'SGSIN', 'MYPKG', 'INMUN', 'PKKHI', 'PKBQM'] 14.31184 73.037198 24.774487 67.333229\n",
      "cur_label: 44\n",
      "155.6725 98.86361111111111\n",
      "Matching number:  44 114.97358585858584 1303.2321489664325\n",
      "\n",
      "\n",
      "29\n",
      "['CNSHK', 'JPTYO'] 22.455612 113.897398 35.616509 139.792252\n",
      "cur_label: 42\n",
      "191.51694444444445 105.89472222222223\n",
      "Matching number:  41 124.37458672086723 2899.9726409054833\n",
      "\n",
      "\n",
      "30\n",
      "['CNSHK', 'KRINC'] 22.45867 113.875772 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2059.338832216044\n",
      "\n",
      "\n",
      "31\n",
      "['CNSHK', 'SGSIN'] 22.44736 113.89249699999999 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2586.1529407549756\n",
      "\n",
      "\n",
      "32\n",
      "['CNSHK', 'HKHKG', 'TWKHH', 'CNNBG', 'CNSHA', 'CNTAO', 'KRPUS', 'MXZLO', 'PABLB', 'PAMIT', 'COCTG', 'JMKIN', 'DOCAU'] 9.257433 -79.91805 18.425821 -69.638318\n",
      "中间港到达\n",
      "cur_label: 5\n",
      "302.2963888888889 177.17944444444444\n",
      "Matching number:  4 197.07125 1503.5677929574933\n",
      "\n",
      "\n",
      "33\n",
      "['CNYTN', 'CAVAN'] 25.360020000000002 120.37988700000001 49.312003999999995 -123.103178\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 14\n",
      "476.12611111111113 423.6116666666667\n",
      "Matching number:  14 434.88926587301586 9632.369518986285\n",
      "\n",
      "\n",
      "34\n",
      "['CNSHK', 'MYPKG'] 22.462887 113.87438300000001 3.034709 101.361204\n",
      "cur_label: 258\n",
      "147.54861111111111 85.88555555555556\n",
      "Matching number:  258 101.93452088716623 2538.9010378803437\n",
      "\n",
      "\n",
      "35\n",
      "['CNSHK', 'SGSIN', 'MTMLA', 'DZALG'] 35.821509999999996 14.534182000000001 36.768454999999996 3.064607\n",
      "cur_label: 11\n",
      "566.5955555555555 109.04472222222222\n",
      "Matching number:  8 197.51916666666665 1034.9870115336334\n",
      "\n",
      "\n",
      "36\n",
      "['CNSHK', 'SGSIN'] 22.459393 113.879148 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2586.728632568627\n",
      "\n",
      "\n",
      "37\n",
      "['CNYTN', 'MXZLO'] 22.560378 114.323558 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13783.438152114319\n",
      "\n",
      "\n",
      "38\n",
      "['CNSHK', 'KRINC'] 22.278288 115.52171799999999 37.401819 126.626175\n",
      "cur_label: 8\n",
      "93.38222222222223 57.69166666666667\n",
      "Matching number:  8 74.60234374999999 1986.9610438266332\n",
      "\n",
      "\n",
      "39\n",
      "['CNYTN', 'PHBTG'] 22.566851999999997 114.265592 13.756824 121.046856\n",
      "cur_label: 67\n",
      "117.36111111111111 45.6975\n",
      "Matching number:  67 75.00896766169154 1210.0836600591397\n",
      "\n",
      "\n",
      "40\n",
      "['CNYTN', 'MXZLO'] 22.559848000000002 114.316903 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13783.9643312522\n",
      "\n",
      "\n",
      "41\n",
      "['CNSHK', 'MYTPP'] 12.281523 110.66202 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 10\n",
      "480.77194444444444 58.60861111111111\n",
      "Matching number:  9 72.66797839506174 1437.0792866103125\n",
      "\n",
      "\n",
      "42\n",
      "['CNSHK', 'INNSA', 'LKCMB', 'AEJEA', 'AEAUH', 'KWKWI'] 5.321225 97.78716800000001 29.045214 48.154646\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 20\n",
      "520.5272222222222 284.19\n",
      "Matching number:  18 426.56581018518517 5838.524355362599\n",
      "\n",
      "\n",
      "43\n",
      "['CNSHK', 'INNSA'] 2.04105 102.17605 18.936253 72.93839\n",
      "cur_label: 355\n",
      "246.32694444444445 144.64833333333334\n",
      "Matching number:  349 187.1288164597262 3694.2841058750855\n",
      "\n",
      "\n",
      "44\n",
      "['SGSIN', 'GRPIR'] 1.268815 103.783957 37.971821999999996 23.616167\n",
      "中间港到达\n",
      "cur_label: 56\n",
      "792.2938888888889 344.06194444444446\n",
      "Matching number:  52 384.6695299145299 9063.238361622645\n",
      "\n",
      "\n",
      "45\n",
      "['CNYTN', 'MXZLO'] 22.301262 114.64707 19.085960999999998 -104.305571\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cur_label: 33\n",
      "601.7263888888889 528.8261111111111\n",
      "Matching number:  33 575.9356691919191 13779.959851347901\n",
      "\n",
      "\n",
      "46\n",
      "['CNSHK', 'EGPSD'] 7.87896 71.262752 31.265289000000003 32.301866\n",
      "中间港到达\n",
      "cur_label: 6\n",
      "204.42055555555555 170.13222222222223\n",
      "Matching number:  6 186.01099537037038 4797.509270376551\n",
      "\n",
      "\n",
      "47\n",
      "['CNSHK', 'MYTPP'] 22.454883 113.87615 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 244\n",
      "540.5613888888889 82.23388888888888\n",
      "Matching number:  243 148.65499942844082 2583.863399022993\n",
      "\n",
      "\n",
      "48\n",
      "['CNYTN', 'MXZLO'] 22.563172 114.27218500000001 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13786.960288572132\n",
      "\n",
      "\n",
      "49\n",
      "['CNSHK', 'CLVAP'] 22.488342000000003 113.865307 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18720.703865296655\n",
      "\n",
      "\n",
      "50\n",
      "['CNSHK', 'BHBAH'] 1.23445 103.705062 26.135392 50.618590000000005\n",
      "cur_label: 5\n",
      "505.96 305.72805555555556\n",
      "Matching number:  4 361.3257986111111 6309.182019675142\n",
      "\n",
      "\n",
      "51\n",
      "['CNSHK', 'SGSIN'] 21.683868 114.317857 1.3031409999999999 103.70461999999999\n",
      "cur_label: 92\n",
      "211.11055555555555 68.82027777777778\n",
      "Matching number:  91 85.61128815628817 2531.489458862296\n",
      "\n",
      "\n",
      "52\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 1.266725 103.78256999999999 25.022073000000002 55.04979\n",
      "cur_label: 57\n",
      "268.4086111111111 211.32638888888889\n",
      "Matching number:  57 239.32643762183235 5853.223308656438\n",
      "\n",
      "\n",
      "53\n",
      "['CNSHK', 'CLVAP'] 21.876967 115.01946699999999 -33.030843 -71.642993\n",
      "cur_label: 7\n",
      "697.7522222222223 628.4397222222223\n",
      "Matching number:  7 654.6214484126983 18609.964759162376\n",
      "\n",
      "\n",
      "54\n",
      "['CNYTN', 'ARENA'] 22.492365 114.436497 -34.627862 -58.35677\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "845.2141666666666 833.4722222222222\n",
      "Matching number:  9 842.2890895061729 18491.875417762672\n",
      "\n",
      "\n",
      "55\n",
      "['SIKOP', 'HKHKG'] 9.691213000000001 109.642742 22.419915 114.13970900000001\n",
      "cur_label: 14\n",
      "79.25166666666667 42.233333333333334\n",
      "Matching number:  14 67.67049603174604 1488.1207454721787\n",
      "\n",
      "\n",
      "56\n",
      "['CNSHK', 'SGSIN'] 13.093067999999999 111.535923 1.3031409999999999 103.70461999999999\n",
      "cur_label: 33\n",
      "178.815 42.208888888888886\n",
      "Matching number:  32 57.72730902777778 1563.8281968532983\n",
      "\n",
      "\n",
      "57\n",
      "['CNSHK', 'SGSIN'] 22.346472 114.07631299999998 1.3031409999999999 103.70461999999999\n",
      "cur_label: 114\n",
      "214.9 72.23805555555556\n",
      "Matching number:  113 88.3001941986234 2584.7546106894315\n",
      "\n",
      "\n",
      "58\n",
      "['CNSHK', 'GRPIR'] 22.48578 113.86306200000001 37.971821999999996 23.616167\n",
      "中间港到达\n",
      "cur_label: 28\n",
      "565.9802777777778 442.21944444444443\n",
      "Matching number:  23 504.64445652173913 8529.49479350002\n",
      "\n",
      "\n",
      "59\n",
      "['CNSHK', 'IDJKT'] 22.452023 113.89433000000001 -6.119484 106.881179\n",
      "cur_label: 442\n",
      "201.46694444444444 98.83416666666666\n",
      "Matching number:  429 118.40747215747217 3251.6334093909877\n",
      "\n",
      "\n",
      "60\n",
      "['BRSSZ', 'CNSHK'] 19.017603 115.069142 22.559462 113.86305800000001\n",
      "中间港到达\n",
      "cur_label: 6\n",
      "279.1747222222222 260.7925\n",
      "Matching number:  6 276.1110185185185 411.74192476489975\n",
      "\n",
      "\n",
      "61\n",
      "['CNSHK', 'SGSIN', 'AEJEA', 'QAHMD', 'SADMM'] 1.23445 103.705062 26.479906 50.191041999999996\n",
      "中间港到达\n",
      "cur_label: 219\n",
      "456.12416666666667 287.5152777777778\n",
      "Matching number:  211 341.5866488941548 6361.120159345572\n",
      "\n",
      "\n",
      "62\n",
      "['CNSHK', 'MYTPP'] 8.681667 109.17833300000001 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 86\n",
      "361.83694444444444 33.18666666666667\n",
      "Matching number:  85 65.91672385620915 1018.8970222546635\n",
      "\n",
      "\n",
      "63\n",
      "['CNYTN', 'HRRIJ'] 22.566498 114.29127700000001 45.342132 14.427023000000002\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 44\n",
      "846.0313888888888 560.525\n",
      "Matching number:  42 615.2747420634921 8987.929262656278\n",
      "\n",
      "\n",
      "64\n",
      "['CNSHK', 'SGSIN'] 13.093067999999999 111.535923 1.3031409999999999 103.70461999999999\n",
      "cur_label: 33\n",
      "178.815 42.208888888888886\n",
      "Matching number:  32 57.72730902777778 1563.8281968532983\n",
      "\n",
      "\n",
      "65\n",
      "['CNYTN', 'GBFXT'] 36.905253 1.007217 51.963177 1.313896\n",
      "cur_label: 144\n",
      "268.03277777777777 90.3536111111111\n",
      "Matching number:  143 112.1513005050505 1673.4242481083443\n",
      "\n",
      "\n",
      "66\n",
      "['CNYTN', 'PAONX'] 22.36982 114.531287 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "837.5244444444445 502.5077777777778\n",
      "Matching number:  7 588.9891468253968 16163.838170442727\n",
      "\n",
      "\n",
      "67\n",
      "['CNSHK', 'JPTYO'] 22.455833 113.8987 35.616509 139.792252\n",
      "cur_label: 42\n",
      "191.51694444444445 105.89472222222223\n",
      "Matching number:  41 124.37458672086723 2899.8498077594318\n",
      "\n",
      "\n",
      "68\n",
      "['CNSHK', 'SGSIN'] 18.206183 112.277833 1.3031409999999999 103.70461999999999\n",
      "cur_label: 6\n",
      "64.36638888888889 60.3225\n",
      "Matching number:  6 63.69240740740741 2091.3236200033975\n",
      "\n",
      "\n",
      "69\n",
      "['CNSHK', 'SGSIN'] 22.307123 113.800995 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "886.0527777777778 72.85138888888889\n",
      "Matching number:  140 92.19526388888889 2568.047845489911\n",
      "\n",
      "\n",
      "70\n",
      "['CNSHK', 'KHKOS'] 22.449623000000003 113.89209699999999 10.888632000000001 103.400193\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 4\n",
      "80.64805555555556 80.64805555555556\n",
      "Matching number:  4 80.64805555555556 1698.1942119171422\n",
      "\n",
      "\n",
      "71\n",
      "['CNSHK', 'SGSIN', 'MTMLA', 'SIKOP'] 27.035172999999997 34.58225 45.537061 13.728527\n",
      "cur_label: 107\n",
      "302.52 105.135\n",
      "Matching number:  102 182.5086982570806 2762.399854507858\n",
      "\n",
      "\n",
      "72\n",
      "['CNYTN', 'ARENA'] 22.569505 114.266332 -34.627862 -58.35677\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "845.3255555555555 833.5625\n",
      "Matching number:  9 842.4057098765431 18492.015119786654\n",
      "\n",
      "\n",
      "73\n",
      "['CNYTN', 'PAONX'] 22.576393 114.285403 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16153.825598702286\n",
      "\n",
      "\n",
      "74\n",
      "['CNYTN', 'MXZLO'] 22.435075 114.477427 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "602.5813888888889 529.8311111111111\n",
      "Matching number:  33 576.8243308080807 13781.987021724728\n",
      "\n",
      "\n",
      "75\n",
      "['CNHKG', 'RUVVO'] 22.332998 114.119372 43.114035 131.882303\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 111\n",
      "414.4938888888889 196.3025\n",
      "Matching number:  109 255.55490316004077 2830.0685901771594\n",
      "\n",
      "\n",
      "76\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 22.63745 113.68783 25.022073000000002 55.04979\n",
      "cur_label: 29\n",
      "345.6769444444444 311.65277777777777\n",
      "Matching number:  29 315.7523898467433 5932.990957652719\n",
      "\n",
      "\n",
      "77\n",
      "['CNSHK', 'MYTPP', 'MUPLU', 'ZADUR'] 5.566023 97.627453 -29.868304 31.050079999999998\n",
      "cur_label: 12\n",
      "496.39666666666665 288.9138888888889\n",
      "Matching number:  11 406.7285101010101 8100.235812554547\n",
      "\n",
      "\n",
      "78\n",
      "['CNSHK', 'JPTYO'] 22.4563 113.8988 35.616509 139.792252\n",
      "cur_label: 42\n",
      "191.51694444444445 105.89472222222223\n",
      "Matching number:  41 124.37458672086723 2899.811101735836\n",
      "\n",
      "\n",
      "79\n",
      "['CNYTN', 'MXZLO'] 22.251767 114.983272 19.085960999999998 -104.305571\n",
      "cur_label: 30\n",
      "600.325 526.9841666666666\n",
      "Matching number:  30 567.6768935185185 13759.025521914427\n",
      "\n",
      "\n",
      "80\n",
      "['CNSHK', 'CLVAP'] 22.344963 113.959585 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18702.30051284406\n",
      "\n",
      "\n",
      "81\n",
      "['CNSHK', 'MYTPP'] 22.449425 113.880675 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 244\n",
      "540.5613888888889 82.23388888888888\n",
      "Matching number:  243 148.65499942844082 2583.5371575800623\n",
      "\n",
      "\n",
      "82\n",
      "['CNYTN', 'MYTPP', 'EGSUE', 'MATNG', 'MACAS'] 36.956367 12.418967 33.599299 -7.607741000000001\n",
      "cur_label: 53\n",
      "358.41027777777776 153.63888888888889\n",
      "Matching number:  52 203.9643108974359 1855.9835516829112\n",
      "\n",
      "\n",
      "83\n",
      "['CNYTN', 'ARENA'] 22.561943 114.27871 -34.627862 -58.35677\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "845.3255555555555 833.5625\n",
      "Matching number:  9 842.4057098765431 18491.81783682763\n",
      "\n",
      "\n",
      "84\n",
      "['CNSHK', 'SGSIN'] 22.450127 113.88043300000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2585.8703909473475\n",
      "\n",
      "\n",
      "85\n",
      "['CNSHK', 'KRINC'] 22.467668 113.87040800000001 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2058.8144903627067\n",
      "\n",
      "\n",
      "86\n",
      "['CNYTN', 'MXZLO'] 22.571125 114.260438 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13787.192862603933\n",
      "\n",
      "\n",
      "87\n",
      "['CNSHK', 'SGSIN'] 13.093067999999999 111.535923 1.3031409999999999 103.70461999999999\n",
      "cur_label: 33\n",
      "178.815 42.208888888888886\n",
      "Matching number:  32 57.72730902777778 1563.8281968532983\n",
      "\n",
      "\n",
      "88\n",
      "['CNSHK', 'CLSAI'] -12.044703 -77.143 -33.578213 -71.609932\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 149\n",
      "246.13833333333332 83.5825\n",
      "Matching number:  147 122.9744671201814 2450.367862675573\n",
      "\n",
      "\n",
      "89\n",
      "['CNSHK', 'JPHKT'] 22.45165 113.89516699999999 33.660845 130.41038600000002\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "125.39722222222223 98.41361111111111\n",
      "Matching number:  9 106.89402777777778 2039.5114360843538\n",
      "\n",
      "\n",
      "90\n",
      "['CNSHK', 'THLCH'] 22.454617000000002 113.8972 13.079153 100.88608\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cur_label: 162\n",
      "170.23388888888888 82.88805555555555\n",
      "Matching number:  160 109.32891145833334 1724.5695600146864\n",
      "\n",
      "\n",
      "91\n",
      "['CNYTN', 'PAONX'] 22.564292000000002 114.284365 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16155.086912335788\n",
      "\n",
      "\n",
      "92\n",
      "['CNSHK', 'SGSIN'] 22.448867 113.89295800000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2586.323382917756\n",
      "\n",
      "\n",
      "93\n",
      "['CNSHK', 'INNSA'] 22.449017 113.881583 18.936253 72.93839\n",
      "cur_label: 321\n",
      "376.0975 248.90333333333334\n",
      "Matching number:  319 303.18454676071053 4270.460171378313\n",
      "\n",
      "\n",
      "94\n",
      "['CNYTN', 'PAONX'] 22.560305 114.32275200000001 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16153.830035368477\n",
      "\n",
      "\n",
      "95\n",
      "['CNSHK', 'SGSIN'] 22.38595 113.898995 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2580.3769373982614\n",
      "\n",
      "\n",
      "96\n",
      "['CNSHK', 'JPHKT'] 23.361783 117.924783 33.660845 130.41038600000002\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "107.64666666666666 78.76361111111112\n",
      "Matching number:  9 89.91134259259259 1669.6870971975247\n",
      "\n",
      "\n",
      "97\n",
      "['CNYTN', 'ESVAL'] 22.57015 114.2706 39.460366 -0.327021\n",
      "cur_label: 57\n",
      "666.2205555555555 516.75\n",
      "Matching number:  52 557.456217948718 10362.374479692293\n",
      "\n",
      "\n",
      "98\n",
      "['CNYTN', 'MXZLO'] 22.533995 114.39606200000001 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13780.214317476331\n",
      "\n",
      "\n",
      "99\n",
      "['CNSHK', 'SGSIN'] 22.349992999999998 114.036185 1.3031409999999999 103.70461999999999\n",
      "cur_label: 114\n",
      "214.9 72.23805555555556\n",
      "Matching number:  113 88.3001941986234 2583.2174009501255\n",
      "\n",
      "\n",
      "100\n",
      "['CNYTN', 'VNVUT', 'SGSIN', 'FRLEH'] 37.364843 4.050572 49.490140000000004 0.204605\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 6\n",
      "189.47444444444446 113.10194444444444\n",
      "Matching number:  6 124.07150462962963 1382.172978615402\n",
      "\n",
      "\n",
      "101\n",
      "['CNDCB', 'SGSIN'] 22.53275 113.85353300000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 34\n",
      "92.95 77.91638888888889\n",
      "Matching number:  34 82.88633578431373 2592.819536093948\n",
      "\n",
      "\n",
      "102\n",
      "['CNSHK', 'MYPKG'] 22.454532999999998 113.89791699999999 3.034709 101.361204\n",
      "cur_label: 258\n",
      "147.54861111111111 85.88555555555556\n",
      "Matching number:  258 101.93452088716623 2539.4870608750757\n",
      "\n",
      "\n",
      "103\n",
      "['CNDCB', 'SGSIN'] 22.53275 113.85353300000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 34\n",
      "92.95 77.91638888888889\n",
      "Matching number:  34 82.88633578431373 2592.819536093948\n",
      "\n",
      "\n",
      "104\n",
      "['CNSHK', 'SGSIN'] 22.4298 113.88361699999999 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2584.004961301667\n",
      "\n",
      "\n",
      "105\n",
      "['CNSHK', 'KRINC'] 22.465088 113.871623 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2058.9831946544728\n",
      "\n",
      "\n",
      "106\n",
      "['CNYTN', 'MATNG'] 22.575843 114.280992 35.788207 -5.8129800000000005\n",
      "中间港到达\n",
      "cur_label: 178\n",
      "833.9730555555556 534.3838888888889\n",
      "Matching number:  177 626.2458066541117 10992.468931747391\n",
      "\n",
      "\n",
      "107\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 1.9862400000000002 104.76941 25.022073000000002 55.04979\n",
      "cur_label: 54\n",
      "275.15305555555557 231.115\n",
      "Matching number:  54 247.2949819958848 5907.515739721888\n",
      "\n",
      "\n",
      "108\n",
      "['CNSHK', 'MYTPP'] 22.311633 114.0891 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 198\n",
      "418.5083333333333 78.9625\n",
      "Matching number:  197 142.83750352509873 2579.8638389478106\n",
      "\n",
      "\n",
      "109\n",
      "['CNSHK', 'CLVAP'] 22.414904999999997 113.887018 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18712.34189500292\n",
      "\n",
      "\n",
      "110\n",
      "['CNYTN', 'SGSIN', 'MTMLA', 'DZALG'] 35.821433 14.534068 36.768454999999996 3.064607\n",
      "cur_label: 11\n",
      "566.5955555555555 109.04472222222222\n",
      "Matching number:  8 197.51916666666665 1034.9782093655876\n",
      "\n",
      "\n",
      "111\n",
      "['CNSHK', 'KRINC'] 22.458643 113.898175 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2058.066284642999\n",
      "\n",
      "\n",
      "112\n",
      "['CNYTN', 'MXZLO'] 22.56677 114.2691 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13786.903304675276\n",
      "\n",
      "\n",
      "113\n",
      "['CNYTN', 'GRPIR'] 22.575307000000002 114.28618700000001 37.971821999999996 23.616167\n",
      "中间港到达\n",
      "cur_label: 96\n",
      "902.2302777777778 405.7966666666667\n",
      "Matching number:  93 476.6306033452808 8558.955123210295\n",
      "\n",
      "\n",
      "114\n",
      "['CNSHK', 'TRYAR'] 1.592495 104.592377 40.7739 29.748835999999997\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 123\n",
      "774.5241666666667 383.6286111111111\n",
      "Matching number:  122 454.421320582878 8621.488639444595\n",
      "\n",
      "\n",
      "115\n",
      "['CNYTN', 'MXZLO'] 22.116954999999997 115.361418 19.085960999999998 -104.305571\n",
      "cur_label: 31\n",
      "599.0063888888889 524.8880555555555\n",
      "Matching number:  31 565.2404345878138 13741.369906548169\n",
      "\n",
      "\n",
      "116\n",
      "['CNYTN', 'MXZLO'] 22.554722 114.34273799999998 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13782.48290339336\n",
      "\n",
      "\n",
      "117\n",
      "['CNYTN', 'CAVAN'] 51.231408 163.03223799999998 49.312003999999995 -123.103178\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 0\n",
      "Not Found!!!\n",
      "\n",
      "\n",
      "118\n",
      "['CNYTN', 'MXZLO'] 22.571097 114.262745 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13787.027184300448\n",
      "\n",
      "\n",
      "119\n",
      "['CNSHK', 'IDJKT'] 22.462007 113.89826299999999 -6.119484 106.881179\n",
      "cur_label: 442\n",
      "201.46694444444444 98.83416666666666\n",
      "Matching number:  429 118.40747215747217 3252.804963798543\n",
      "\n",
      "\n",
      "120\n",
      "['CNYTN', 'PAONX'] 22.56995 114.264852 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16155.359048168757\n",
      "\n",
      "\n",
      "121\n",
      "['CNSHK', 'ESVAL'] 5.938527 85.071348 39.460366 -0.327021\n",
      "cur_label: 7\n",
      "394.99944444444446 394.99944444444446\n",
      "Matching number:  7 394.9994444444445 9200.530030829252\n",
      "\n",
      "\n",
      "122\n",
      "['CNSHK', 'JPTYO'] 24.5162 118.0795 35.616509 139.792252\n",
      "cur_label: 11\n",
      "167.39583333333334 86.02361111111111\n",
      "Matching number:  10 92.16884722222221 2420.000946646335\n",
      "\n",
      "\n",
      "123\n",
      "['CNYTN', 'CAVAN'] 22.561817 114.2523 49.312003999999995 -123.103178\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 16\n",
      "664.6544444444445 469.4141666666667\n",
      "Matching number:  14 478.4084920634921 10241.24948549564\n",
      "\n",
      "\n",
      "124\n",
      "['CNYTN', 'NZAKL'] 19.915965 118.64251999999999 -36.844873 174.78561399999998\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 12\n",
      "314.3697222222222 309.19638888888886\n",
      "Matching number:  12 311.12266203703706 8611.969987905683\n",
      "\n",
      "\n",
      "125\n",
      "['CNYTN', 'BRSSZ'] -10.350442999999999 70.660108 -23.954513000000002 -46.28402\n",
      "cur_label: 18\n",
      "439.7266666666667 427.68888888888887\n",
      "Matching number:  18 429.25775462962963 12195.090877963092\n",
      "\n",
      "\n",
      "126\n",
      "['CNYTN', 'MTMLA'] 22.503954999999998 114.417278 35.896571 14.509457000000001\n",
      "中间港到达\n",
      "cur_label: 157\n",
      "1153.3805555555555 421.0988888888889\n",
      "Matching number:  156 539.1740322293448 9413.62583155516\n",
      "\n",
      "\n",
      "127\n",
      "['CNYTN', 'PAONX'] 22.57568 114.28203500000001 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16154.042484641945\n",
      "\n",
      "\n",
      "128\n",
      "['CNSHK', 'CLVAP'] 13.754114999999999 125.925653 -33.030843 -71.642993\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 11\n",
      "560.4077777777778 559.5941666666666\n",
      "Matching number:  11 560.2228661616161 17234.56113368988\n",
      "\n",
      "\n",
      "129\n",
      "['CNSHK', 'CLVAP'] 22.484017 113.86561699999999 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18720.25016382233\n",
      "\n",
      "\n",
      "130\n",
      "['CNSHK', 'KRINC'] 22.484588000000002 113.863422 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2057.652799827734\n",
      "\n",
      "\n",
      "131\n",
      "['CNSHK', 'SGSIN'] 22.351468 113.93468 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "887.5636111111111 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2578.6237727097628\n",
      "\n",
      "\n",
      "132\n",
      "['CNYTN', 'CAVAN'] 22.568804999999998 114.26697 49.312003999999995 -123.103178\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 16\n",
      "664.6544444444445 469.4141666666667\n",
      "Matching number:  14 478.4084920634921 10239.773902609932\n",
      "\n",
      "\n",
      "133\n",
      "['CNYTN', 'MYTPP', 'EGSUE', 'MATNG', 'MACAS'] 5.977858 93.452112 33.599299 -7.607741000000001\n",
      "cur_label: 23\n",
      "646.8461111111111 448.3877777777778\n",
      "Matching number:  21 510.32168650793653 10664.690218712425\n",
      "\n",
      "\n",
      "134\n",
      "['CNSHK', 'SGSIN'] 22.633612 113.692627 1.3031409999999999 103.70461999999999\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "中间港到达\n",
      "cur_label: 187\n",
      "890.7188888888888 82.96055555555556\n",
      "Matching number:  185 134.09637612612613 2595.527836861185\n",
      "\n",
      "\n",
      "135\n",
      "['CNSHK', 'CLVAP'] 22.367102 113.911768 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18706.47429569851\n",
      "\n",
      "\n",
      "136\n",
      "['CNSHK', 'SGSIN'] 22.624562 113.697837 1.3031409999999999 103.70461999999999\n",
      "中间港到达\n",
      "cur_label: 187\n",
      "890.7188888888888 82.96055555555556\n",
      "Matching number:  185 134.09637612612613 2594.8618105738287\n",
      "\n",
      "\n",
      "137\n",
      "['CNSHK', 'THLCH'] 22.4614 113.898133 13.079153 100.88608\n",
      "cur_label: 162\n",
      "170.23388888888888 82.88805555555555\n",
      "Matching number:  160 109.32891145833334 1725.0771501214947\n",
      "\n",
      "\n",
      "138\n",
      "['CNSHK', 'JPTYO'] 22.451167 113.894333 35.616509 139.792252\n",
      "cur_label: 42\n",
      "191.51694444444445 105.89472222222223\n",
      "Matching number:  41 124.37458672086723 2900.5170962253364\n",
      "\n",
      "\n",
      "139\n",
      "['CNSHK', 'LKCMB'] 22.451332999999998 113.89593300000001 6.984864999999999 79.891802\n",
      "cur_label: 80\n",
      "386.2816666666667 163.35611111111112\n",
      "Matching number:  79 224.60385372714484 4026.581595101427\n",
      "\n",
      "\n",
      "140\n",
      "['CNYTN', 'MXZLO'] 22.554577 114.352447 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13781.786707098463\n",
      "\n",
      "\n",
      "141\n",
      "['CNNSA', 'SGSIN', 'AEJEA'] 17.426292999999998 66.7764 25.022073000000002 55.04979\n",
      "cur_label: 43\n",
      "72.70111111111112 57.022777777777776\n",
      "Matching number:  43 61.98288759689923 1478.3478721053748\n",
      "\n",
      "\n",
      "142\n",
      "['CNSHK', 'SGSIN'] 13.344204999999999 110.28633 1.3031409999999999 103.70461999999999\n",
      "cur_label: 7\n",
      "69.71916666666667 43.87277777777778\n",
      "Matching number:  7 66.0268253968254 1516.416852140993\n",
      "\n",
      "\n",
      "143\n",
      "['CNSHK', 'MYPKG'] 22.449667 113.89215 3.034709 101.361204\n",
      "cur_label: 258\n",
      "147.54861111111111 85.88555555555556\n",
      "Matching number:  258 101.93452088716623 2538.7084223587735\n",
      "\n",
      "\n",
      "144\n",
      "['CNSHK', 'IDJKT'] 22.462138 113.89875 -6.119484 106.881179\n",
      "cur_label: 442\n",
      "201.46694444444444 98.83416666666666\n",
      "Matching number:  429 118.40747215747217 3252.831489996731\n",
      "\n",
      "\n",
      "145\n",
      "['CNSHK', 'BZBZE'] 9.257433 -79.91805 17.507049 -88.19550600000001\n",
      "中间港到达\n",
      "cur_label: 10\n",
      "477.9736111111111 188.68805555555556\n",
      "Matching number:  8 257.9570138888889 1278.7455986861924\n",
      "\n",
      "\n",
      "146\n",
      "['CNYTN', 'GRPIR'] 11.846907 61.463359999999994 37.971821999999996 23.616167\n",
      "中间港到达\n",
      "cur_label: 46\n",
      "504.5075 180.81472222222223\n",
      "Matching number:  45 211.04790432098764 4740.647748390407\n",
      "\n",
      "\n",
      "147\n",
      "['CNSHK', 'MYPKG'] 22.452512 113.89430300000001 3.034709 101.361204\n",
      "cur_label: 258\n",
      "147.54861111111111 85.88555555555556\n",
      "Matching number:  258 101.93452088716623 2539.093529938577\n",
      "\n",
      "\n",
      "148\n",
      "['CNYTN', 'MATNG'] 22.564435 114.290705 35.788207 -5.8129800000000005\n",
      "中间港到达\n",
      "cur_label: 178\n",
      "833.9730555555556 534.3838888888889\n",
      "Matching number:  177 626.2458066541117 10994.067874395076\n",
      "\n",
      "\n",
      "149\n",
      "['CNSHK', 'CLVAP'] 22.479642000000002 113.86532700000001 -33.030843 -71.642993\n",
      "cur_label: 9\n",
      "716.0797222222222 662.6594444444445\n",
      "Matching number:  9 681.2486574074073 18719.815321492486\n",
      "\n",
      "\n",
      "150\n",
      "['CNSHK', 'PKQCT'] 22.459757999999997 113.87901799999999 24.766207 67.32748000000001\n",
      "cur_label: 18\n",
      "496.30055555555555 376.6191666666667\n",
      "Matching number:  17 398.01960784313724 4734.974655206617\n",
      "\n",
      "\n",
      "151\n",
      "['CNYTN', 'SGSIN'] 19.380678 113.7924 1.3031409999999999 103.70461999999999\n",
      "中间港到达\n",
      "cur_label: 74\n",
      "217.84944444444446 60.514722222222225\n",
      "Matching number:  73 90.45586567732116 2282.1730146994814\n",
      "\n",
      "\n",
      "152\n",
      "['CNSHK', 'IDJKT'] 22.452023 113.89433000000001 -6.119484 106.881179\n",
      "cur_label: 442\n",
      "201.46694444444444 98.83416666666666\n",
      "Matching number:  429 118.40747215747217 3251.6334093909877\n",
      "\n",
      "\n",
      "153\n",
      "['CNYTN', 'BDCGP'] 21.992717000000003 91.767112 22.304439000000002 91.79332099999999\n",
      "cur_label: 28\n",
      "201.63194444444446 26.960277777777776\n",
      "Matching number:  26 94.54897435897436 34.62340456228541\n",
      "\n",
      "\n",
      "154\n",
      "['CNSHK', 'MYTPP'] 22.114932999999997 114.24693300000001 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 200\n",
      "417.5761111111111 78.09333333333333\n",
      "Matching number:  199 141.81604201563374 2568.268393107765\n",
      "\n",
      "\n",
      "155\n",
      "['CNYTN', 'PAONX'] 22.557368 114.33578999999999 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16153.561202951869\n",
      "\n",
      "\n",
      "156\n",
      "['CNSHK', 'MYTPP'] 12.011283 109.924333 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 20\n",
      "284.3988888888889 42.464166666666664\n",
      "Matching number:  19 86.57186403508773 1368.6648476349167\n",
      "\n",
      "\n",
      "157\n",
      "['CNSHK', 'SIKOP'] 22.35424 113.92714699999999 45.537061 13.728527\n",
      "cur_label: 15\n",
      "591.7883333333333 502.0852777777778\n",
      "Matching number:  14 546.4932539682541 9020.379827611801\n",
      "\n",
      "\n",
      "158\n",
      "['CNYTN', 'MXZLO'] 22.572125 114.261563 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13787.032745662127\n",
      "\n",
      "\n",
      "159\n",
      "['CNYTN', 'PAONX'] 22.4751 114.44311499999999 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.0709523809523 16157.15415197651\n",
      "\n",
      "\n",
      "160\n",
      "['CNSHK', 'MYTPP'] 22.438717 113.88111699999999 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 244\n",
      "540.5613888888889 82.23388888888888\n",
      "Matching number:  243 148.65608996342021 2582.5021307364746\n",
      "\n",
      "\n",
      "161\n",
      "['CNYTN', 'NZAKL'] 5.589288 135.220402 -36.844873 174.78561399999998\n",
      "cur_label: 6\n",
      "316.71944444444443 225.54888888888888\n",
      "Matching number:  5 239.25394444444444 6242.643781691612\n",
      "\n",
      "\n",
      "162\n",
      "['CNYTN', 'VNVUT', 'SGSIN', 'FRLEH'] 12.334862 47.169157 49.490140000000004 0.204605\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 6\n",
      "386.4075 339.41194444444443\n",
      "Matching number:  6 381.44222222222226 5941.796682823364\n",
      "\n",
      "\n",
      "163\n",
      "['CNSHK', 'JPHKT'] 22.461682999999997 113.89911699999999 33.660845 130.41038600000002\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "125.39722222222223 98.41361111111111\n",
      "Matching number:  9 106.89402777777778 2038.474719568316\n",
      "\n",
      "\n",
      "164\n",
      "['CNSHK', 'GRPIR', 'ITSPE', 'ITGOA'] 27.402341999999997 34.213678 44.40565 8.946256\n",
      "cur_label: 8\n",
      "269.9411111111111 205.2736111111111\n",
      "Matching number:  8 250.70536458333333 2938.3798773417943\n",
      "\n",
      "\n",
      "165\n",
      "['CNYTN', 'GBFXT'] 5.879485 85.683133 51.963177 1.313896\n",
      "cur_label: 133\n",
      "590.9605555555555 387.41027777777776\n",
      "Matching number:  132 427.5364194023569 9110.48301921898\n",
      "\n",
      "\n",
      "166\n",
      "['CNSHK', 'MYTPP'] 22.443992 113.88115 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 244\n",
      "540.5613888888889 82.23388888888888\n",
      "Matching number:  243 148.65608996342021 2583.023743331865\n",
      "\n",
      "\n",
      "167\n",
      "['CNSHK', 'SGSIN'] 19.864382 112.74496699999999 1.3031409999999999 103.70461999999999\n",
      "cur_label: 22\n",
      "96.83333333333333 59.37277777777778\n",
      "Matching number:  22 90.20624368686867 2277.1024579425084\n",
      "\n",
      "\n",
      "168\n",
      "['CNSHK', 'SGSIN'] 22.346726999999998 114.02420500000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "887.1108333333333 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2582.3339513026676\n",
      "\n",
      "\n",
      "169\n",
      "['CNSHK', 'ZADUR'] 22.351563 114.03651299999999 -29.868304 31.050079999999998\n",
      "cur_label: 211\n",
      "654.6836111111111 460.395\n",
      "Matching number:  208 532.739140625 10585.493967130762\n",
      "\n",
      "\n",
      "170\n",
      "['CNSHK', 'SGSIN'] 22.346828 114.02548999999999 1.3031409999999999 103.70461999999999\n",
      "cur_label: 114\n",
      "214.9 72.23805555555556\n",
      "Matching number:  113 88.3001941986234 2582.404105561211\n",
      "\n",
      "\n",
      "171\n",
      "['CNYTN', 'PHBTG'] 22.569000000000003 114.263833 13.756824 121.046856\n",
      "cur_label: 67\n",
      "117.36111111111111 45.6975\n",
      "Matching number:  67 75.00896766169154 1210.3825005542542\n",
      "\n",
      "\n",
      "172\n",
      "['CNSHK', 'ZADUR'] -0.747323 86.580088 -29.868304 31.050079999999998\n",
      "cur_label: 42\n",
      "436.4066666666667 230.4286111111111\n",
      "Matching number:  41 320.271331300813 6691.735825608156\n",
      "\n",
      "\n",
      "173\n",
      "['CNSHK', 'MYTPP'] 11.232475 110.575148 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 32\n",
      "180.24 42.15222222222222\n",
      "Matching number:  31 74.89355734767025 1336.412427364801\n",
      "\n",
      "\n",
      "174\n",
      "['CNYTN', 'CVRAI'] 26.065048 -15.962615 14.919073000000001 -23.494260999999998\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 8\n",
      "57.33888888888889 52.856944444444444\n",
      "Matching number:  8 54.154687499999994 1461.9181776867526\n",
      "\n",
      "\n",
      "175\n",
      "['CNYTN', 'PAONX'] 22.562901999999998 114.31573700000001 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16153.872280625395\n",
      "\n",
      "\n",
      "176\n",
      "['CNSHK', 'KRINC'] 22.466352 113.87267800000001 37.401819 126.626175\n",
      "cur_label: 9\n",
      "100.69055555555556 63.8825\n",
      "Matching number:  9 82.28078703703704 2058.806519004467\n",
      "\n",
      "\n",
      "177\n",
      "['CNYTN', 'ARENA'] 22.5532 114.37735 -34.627862 -58.35677\n",
      "中间港到达\n",
      "cur_label: 9\n",
      "845.3255555555555 833.5625\n",
      "Matching number:  9 842.4057098765431 18495.347961732597\n",
      "\n",
      "\n",
      "178\n",
      "['CNYTN', 'PAONX'] 22.385428 114.50900700000001 9.352609 -79.88299\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cur_label: 8\n",
      "837.6294444444444 502.75444444444446\n",
      "Matching number:  7 589.1127579365079 16163.2550809039\n",
      "\n",
      "\n",
      "179\n",
      "['CNYTN', 'MXZLO'] 22.559882 114.316018 19.085960999999998 -104.305571\n",
      "cur_label: 33\n",
      "625.8738888888889 530.5641666666667\n",
      "Matching number:  32 577.3451085069445 13784.026131975834\n",
      "\n",
      "\n",
      "180\n",
      "['CNSHK', 'SGSIN'] 22.445617000000002 113.8905 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2585.888251717575\n",
      "\n",
      "\n",
      "181\n",
      "['CNSHK', 'MYTPP'] 22.46007 113.87864499999999 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 244\n",
      "540.5613888888889 82.23388888888888\n",
      "Matching number:  243 148.65499942844082 2584.4917369248333\n",
      "\n",
      "\n",
      "182\n",
      "['CNSHK', 'GRPIR', 'ITSPE', 'ITGOA'] 29.836035 32.570617999999996 44.40565 8.946256\n",
      "cur_label: 8\n",
      "259.4327777777778 194.21055555555554\n",
      "Matching number:  8 240.63503472222223 2633.3623561886357\n",
      "\n",
      "\n",
      "183\n",
      "['CNYTN', 'PHBTG'] 22.566927 114.26161299999998 13.756824 121.046856\n",
      "cur_label: 67\n",
      "117.36111111111111 45.6975\n",
      "Matching number:  67 75.00896766169154 1210.3392270005181\n",
      "\n",
      "\n",
      "184\n",
      "['CNYTN', 'MYTPP'] 22.560978 114.28738500000001 1.399416 103.545456\n",
      "中间港到达\n",
      "cur_label: 1152\n",
      "375.90555555555557 72.17861111111111\n",
      "Matching number:  1148 95.61993745160666 2613.8079227924045\n",
      "\n",
      "\n",
      "185\n",
      "['CNYTN', 'CAVAN'] 35.077382 128.792608 49.312003999999995 -123.103178\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 16\n",
      "512.57 293.4161111111111\n",
      "Matching number:  14 305.3099801587301 8286.076492183758\n",
      "\n",
      "\n",
      "186\n",
      "['CNSHA', 'SGSIN'] 30.643978000000004 122.028062 1.3031409999999999 103.70461999999999\n",
      "中间港到达\n",
      "cur_label: 12\n",
      "345.6927777777778 138.30388888888888\n",
      "Matching number:  11 206.48448232323233 3779.5752077486068\n",
      "\n",
      "\n",
      "187\n",
      "['CNSHK', 'SGSIN', 'AEJEA', 'QAHMD', 'SADMM', 'OMSOH'] 1.23445 103.705062 24.367309 56.737261\n",
      "cur_label: 19\n",
      "491.0647222222222 399.18583333333333\n",
      "Matching number:  16 428.28126736111113 5662.523440905766\n",
      "\n",
      "\n",
      "188\n",
      "['CNYTN', 'GBFXT'] 25.66091 35.523378 51.963177 1.313896\n",
      "cur_label: 156\n",
      "401.3227777777778 204.40666666666667\n",
      "Matching number:  155 240.32661648745517 4097.201913986357\n",
      "\n",
      "\n",
      "189\n",
      "['CNSHK', 'CLSAI'] 8.79829 -85.491527 -33.578213 -71.609932\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 5\n",
      "329.5527777777778 323.2738888888889\n",
      "Matching number:  5 328.297 4914.503728247314\n",
      "\n",
      "\n",
      "190\n",
      "['CNSHK', 'SGSIN', 'AEJEA', 'QAHMD', 'SADMM'] 22.4441 113.88326699999999 26.479906 50.191041999999996\n",
      "中间港到达\n",
      "cur_label: 213\n",
      "543.6455555555556 372.6361111111111\n",
      "Matching number:  205 440.92132859078595 6407.5166110341725\n",
      "\n",
      "\n",
      "191\n",
      "['CNYTN', 'PAONX'] 22.533607999999997 114.37783200000001 9.352609 -79.88299\n",
      "cur_label: 8\n",
      "858.3363888888889 503.17083333333335\n",
      "Matching number:  7 590.3283333333334 16154.125306941116\n",
      "\n",
      "\n",
      "192\n",
      "['CNSHK', 'SGSIN'] 22.454832999999997 113.89753300000001 1.3031409999999999 103.70461999999999\n",
      "cur_label: 141\n",
      "888.5216666666666 72.85138888888889\n",
      "Matching number:  140 92.2659880952381 2587.1250518251923\n",
      "\n",
      "\n",
      "193\n",
      "['CNNSA', 'CMKBI'] 6.14401 1.2874700000000001 2.939002 9.906216\n",
      "中间港到达\n",
      "匹配无路由的数据\n"
     ]
    }
   ],
   "source": [
    "# 修改，使用groupby('TRANSPORT_TRACE')\n",
    "gc.collect()\n",
    "def matching(test_trace, port_data):\n",
    "    #train_data.sort_values(['loadingOrder', 'timestamp'], inplace = True)\n",
    "    train_group_1 = train_data.groupby('TRANSPORT_TRACE')\n",
    "    port_group = port_data.groupby('TRANS_NODE_NAME')\n",
    "    print('start matching!!!')\n",
    "    # 首先匹配长度为2的\n",
    "    for i in test_trace.index:\n",
    "        trace = test_trace.loc[i, 'TRANSPORT_TRACE'].split('-')\n",
    "        if test_label[i] == 0:\n",
    "            print(i)\n",
    "            cur_order = []\n",
    "            cur_label = []\n",
    "            seconds = []\n",
    "            dis_arr = []\n",
    "            index_arr = []\n",
    "            true_lat = port_group.get_group(trace[0]).iloc[0, 2]\n",
    "            true_lon = port_group.get_group(trace[0]).iloc[0, 1]\n",
    "            start_lat = test_trace.loc[i, 'latitude']\n",
    "            start_lon = test_trace.loc[i, 'longitude']\n",
    "            end_lat = port_group.get_group(trace[-1]).iloc[0, 2]\n",
    "            end_lon = port_group.get_group(trace[-1]).iloc[0, 1]\n",
    "            print(trace, start_lat, start_lon, end_lat, end_lon)\n",
    "            if trace == ['HUNGARY', 'HON']:\n",
    "                trace = ['HUNGARY', 'HONGKONG']\n",
    "            if trace == ['LTKLJ', 'DEHAM', 'HKHKG']:\n",
    "                trace = ['LTKLJ', 'DEHAM', 'HONG KONG_HONG KONG']\n",
    "            if trace == ['CNSHK', 'SGSIN', 'MTMLA', 'DZALG']:\n",
    "                trace = ['CNYTN', 'SGSIN', 'MTMLA', 'DZALG']\n",
    "            for name1, group1 in train_group_1:\n",
    "                # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                cur_trace = str(name1).split('-')\n",
    "                # 判断是否含有终点\n",
    "                if trace == cur_trace:\n",
    "                    cur_order.append(name1)\n",
    "                    train_group_2 = group1.groupby('loadingOrder')\n",
    "                    for name, group in train_group_2:\n",
    "                        #index.append(list(group_2.index)[0])\n",
    "                        start_time = group.loc[group.index[0], 'timestamp']\n",
    "                        start_index = group.index[0]\n",
    "                        judge = False\n",
    "                        for j in group.index:\n",
    "                            # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                            if abs(group.loc[j, 'longitude'] - start_lon) <= 0.15 and \\\n",
    "                                abs(group.loc[j, 'latitude'] - start_lat) <= 0.15 and \\\n",
    "                                not judge:\n",
    "                                start_time = group.loc[j, 'timestamp']\n",
    "                                start_index = j\n",
    "                                judge = True\n",
    "\n",
    "                        if judge:\n",
    "                            tmp = []\n",
    "                            for j in group.index[::-1]:\n",
    "                                if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                    group.loc[j, 'speed'] > 0:\n",
    "                                    tmp.append(name)\n",
    "                                    tmp.append(start_index)\n",
    "                                    tmp.append(j)\n",
    "                                    tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                    index_arr.append(tmp)\n",
    "                                    time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                    cur_label.append(time_gap)\n",
    "                                    break\n",
    "            if len(cur_label) < 5:\n",
    "                print('中间港到达')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if trace[0] in cur_trace and trace[1] in cur_trace and \\\n",
    "                        cur_trace.index(trace[0]) < cur_trace.index(trace[1]):\n",
    "                        cur_order.append(name1)\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            #index.append(list(group_2.index)[0])\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.15 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - start_lat) <= 0.15 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "            if len(cur_label) < 5:\n",
    "                print('匹配无路由的数据')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    if name in cur_order:\n",
    "                        continue\n",
    "                    if len(cur_label) >= 10:\n",
    "                        break\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if trace[-1] in cur_trace or len(cur_trace) < 2:\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            end_index = group.index[-1]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.15 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - start_lat) <= 0.15 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "            print(\"cur_label:\", len(cur_label))\n",
    "            if(len(cur_label) > 0):\n",
    "                max_label = np.max(cur_label)\n",
    "                min_label = np.min(cur_label)\n",
    "                print(max_label, min_label)\n",
    "                temp_label = []\n",
    "                tmp_index = []\n",
    "                if max_label - min_label >= 80:\n",
    "                    for k in range(len(cur_label)):\n",
    "                        if abs(cur_label[k] - max_label) <= 5:\n",
    "                            continue\n",
    "                        else:\n",
    "                            temp_label.append(cur_label[k])\n",
    "                            tmp_index.append(index_arr[k])\n",
    "                if(len(temp_label) > 1):\n",
    "                    for k in range(len(tmp_index)):\n",
    "                        indexes.append(tmp_index[k])\n",
    "                    test_label[i] = (np.mean(np.array(temp_label)) + np.median(np.array(temp_label))) / 2\n",
    "                    #print(temp_label)\n",
    "                    print('Matching number: ', len(temp_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                else:\n",
    "                    for k in range(len(index_arr)):\n",
    "                        indexes.append(index_arr[k])\n",
    "                    test_label[i] = (np.mean(np.array(cur_label)) + np.median(np.array(cur_label))) / 2\n",
    "                    #print(cur_label)\n",
    "                    print('Matching number: ', len(cur_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                #print(temp_label)\n",
    "            else:\n",
    "                print('Not Found!!!')\n",
    "                print('\\n')\n",
    "\n",
    "#train_3_port['timestamp'] = pd.to_datetime(train_3_port['timestamp'], infer_datetime_format=True)\n",
    "port_data = pd.read_csv(port_data_path, usecols = [0, 1, 2, 8])\n",
    "port_data['LONGITUDE'] = pd.to_numeric(port_data['LONGITUDE'])\n",
    "port_data['LATITUDE'] =  pd.to_numeric(port_data['LATITUDE'])\n",
    "port_data['LONGITUDE'] = port_data['LONGITUDE'].astype(float)\n",
    "port_data['LATITUDE'] = port_data['LATITUDE'].astype(float)\n",
    "matching(test_trace, port_data)\n",
    "test_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start matching!!!\n",
      "174\n",
      "['CNSHK', 'INNSA', 'LKCMB', 'AEJEA', 'AEAUH', 'KWKWI'] 15.532677 70.487358 29.045214 48.154646\n",
      "中间港到达\n",
      "cur_label: 5\n",
      "382.67527777777775 257.73\n",
      "Matching number:  4 282.3073263888889 2736.5647516278273\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([346.51371528, 573.11440359, 565.33141577, 875.69463889,\n",
       "       577.34510851, 707.38746212, 575.61265   , 611.7420508 ,\n",
       "       854.31114583, 558.23244658, 488.84276042, 200.1551929 ,\n",
       "       577.34510851, 361.37426136, 308.87958689, 680.09780864,\n",
       "       558.77422138, 606.02342593, 424.71443507, 744.21889815,\n",
       "       590.32833333, 575.9772096 , 892.83956019, 590.32833333,\n",
       "       576.82433081, 576.82433081, 681.24865741, 681.24865741,\n",
       "       590.07095238, 681.24865741, 590.32833333, 577.34510851,\n",
       "       590.32833333, 589.26789683, 540.92641204, 577.34510851,\n",
       "       590.32833333, 589.26503968, 590.32833333, 681.24865741,\n",
       "       681.24865741, 590.32833333, 577.08277778, 577.08277778,\n",
       "       548.84947222, 590.32833333, 590.07095238, 589.26789683,\n",
       "       577.34510851, 590.32833333, 546.49325397, 546.49325397,\n",
       "       590.32833333, 398.01960784, 680.17391204, 101.42987817,\n",
       "       577.34510851,  92.2659881 , 577.34510851, 148.66289952,\n",
       "       577.34510851,  26.7939115 , 577.34510851, 577.34510851,\n",
       "        92.2659881 , 722.5775    , 842.40570988,  83.82221955,\n",
       "       148.66289952, 148.66289952,  87.37236483,  86.67597468,\n",
       "       466.14197917, 127.98087317,  92.2659881 ,  92.2659881 ,\n",
       "       592.04763889, 577.34510851, 148.66361968,  87.71710297,\n",
       "         2.98132737, 574.77648569, 574.77648569,  92.2659881 ,\n",
       "        92.2659881 , 148.66289952,  92.2659881 , 148.67956048,\n",
       "       143.12496898,  17.38039886,  92.2659881 , 842.40570988,\n",
       "        92.2659881 , 842.40570988,  88.3001942 ,  92.2659881 ,\n",
       "        86.27167636, 141.99131703, 141.99131703, 842.40570988,\n",
       "       148.66289952, 148.66289952, 318.31864644,  84.25787124,\n",
       "        92.2659881 ,  92.2659881 ,  92.2659881 , 148.66215878,\n",
       "       148.66215878, 577.34510851, 504.66199275, 478.40849206,\n",
       "       478.40849206,  92.2659881 ,  92.2659881 ,  92.2659881 ,\n",
       "       577.34510851, 577.34510851,  92.2659881 ,  92.2659881 ,\n",
       "        92.2659881 , 478.40849206,  92.2659881 ,  92.2659881 ,\n",
       "        92.2659881 , 621.55015625, 430.52473611, 719.06332071,\n",
       "       378.75859127, 384.92972222, 408.55591564, 697.74626157,\n",
       "       707.65512731, 664.73467593, 551.90443866, 429.61180556,\n",
       "       464.76263889, 376.8937037 , 566.771     , 455.94967014,\n",
       "       448.59613426, 479.19293981, 380.93697338, 470.63      ,\n",
       "       410.72135031, 554.75431713, 631.02180556, 192.40785354,\n",
       "       621.17630556, 627.42796296,  15.1545463 , 112.6170101 ,\n",
       "       427.54611111, 374.54204457,   4.66297222, 523.97409323,\n",
       "        80.76090278,  10.30147222, 124.07150463, 202.75100505,\n",
       "        26.83478819,  54.31680556,  75.91666667, 190.66097222,\n",
       "        94.74114316, 251.37512153, 364.07805556,  28.442423  ,\n",
       "       156.11380556, 151.32628502, 215.02463542,  61.89465856,\n",
       "       237.08240741,  17.68572222, 282.30732639,  62.2325    ,\n",
       "       244.52265432, 197.72005208, 197.72005208, 590.64417259,\n",
       "       224.60688994, 247.71366162,  92.2659881 ,  52.40259259,\n",
       "       185.30969444, 124.37458672, 303.18520202,  82.28078704,\n",
       "        75.00896766, 483.59318627, 101.93527347,  80.64805556,\n",
       "        82.28078704, 106.89402778,  82.88633578, 118.60030882,\n",
       "       109.32891146, 101.93527347,  82.28078704, 100.95199561])"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修改，使用groupby('TRANSPORT_TRACE)\n",
    "gc.collect()\n",
    "def matching(test_trace, port_data):\n",
    "    #train_data.sort_values(['loadingOrder', 'timestamp'], inplace = True)\n",
    "    train_group_1 = train_data.groupby('TRANSPORT_TRACE')\n",
    "    port_group = port_data.groupby('TRANS_NODE_NAME')\n",
    "    print('start matching!!!')\n",
    "    # 首先匹配长度为2的\n",
    "    for i in test_trace.index:\n",
    "        trace = test_trace.loc[i, 'TRANSPORT_TRACE'].split('-')\n",
    "        if test_label[i] == 0:\n",
    "            print(i)\n",
    "            cur_order = []\n",
    "            cur_label = []\n",
    "            seconds = []\n",
    "            dis_arr = []\n",
    "            index_arr = []\n",
    "            true_lat = port_group.get_group(trace[0]).iloc[0, 2]\n",
    "            true_lon = port_group.get_group(trace[0]).iloc[0, 1]\n",
    "            start_lat = test_trace.loc[i, 'latitude']\n",
    "            start_lon = test_trace.loc[i, 'longitude']\n",
    "            end_lat = port_group.get_group(trace[-1]).iloc[0, 2]\n",
    "            end_lon = port_group.get_group(trace[-1]).iloc[0, 1]\n",
    "            print(trace, start_lat, start_lon, end_lat, end_lon)\n",
    "            if trace == ['HUNGARY', 'HON']:\n",
    "                trace = ['HUNGARY', 'HONGKONG']\n",
    "            if trace == ['LTKLJ', 'DEHAM', 'HKHKG']:\n",
    "                trace = ['LTKLJ', 'DEHAM', 'HONG KONG_HONG KONG']\n",
    "            if trace == ['CNSHK', 'SGSIN', 'MTMLA', 'DZALG']:\n",
    "                trace = ['CNYTN', 'SGSIN', 'MTMLA', 'DZALG']\n",
    "            for name1, group1 in train_group_1:\n",
    "                # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                cur_trace = str(name1).split('-')\n",
    "                # 判断是否含有终点\n",
    "                if trace == cur_trace:\n",
    "                    cur_order.append(name1)\n",
    "                    train_group_2 = group1.groupby('loadingOrder')\n",
    "                    for name, group in train_group_2:\n",
    "                        #index.append(list(group_2.index)[0])\n",
    "                        start_time = group.loc[group.index[0], 'timestamp']\n",
    "                        start_index = group.index[0]\n",
    "                        judge = False\n",
    "                        for j in group.index:\n",
    "                            # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                            if abs(group.loc[j, 'longitude'] - start_lon) <= 0.1 and \\\n",
    "                                not judge:\n",
    "                                start_time = group.loc[j, 'timestamp']\n",
    "                                start_index = j\n",
    "                                judge = True\n",
    "\n",
    "                        if judge:\n",
    "                            tmp = []\n",
    "                            for j in group.index[::-1]:\n",
    "                                if abs(group.loc[j, 'longitude'] - end_lon) < 0.4 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - end_lat) < 0.4 and \\\n",
    "                                    group.loc[j, 'speed'] > 0:\n",
    "                                    tmp.append(name)\n",
    "                                    tmp.append(start_index)\n",
    "                                    tmp.append(j)\n",
    "                                    tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                    index_arr.append(tmp)\n",
    "                                    time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                    cur_label.append(time_gap)\n",
    "                                    break\n",
    "            if len(cur_label) < 2:\n",
    "                print('中间港到达')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if trace[0] in cur_trace and trace[1] in cur_trace and \\\n",
    "                        cur_trace.index(trace[0]) < cur_trace.index(trace[1]):\n",
    "                        cur_order.append(name1)\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            #index.append(list(group_2.index)[0])\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.1 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "            if len(cur_label) < 2:\n",
    "                print('匹配无路由的数据')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    if name in cur_order:\n",
    "                        continue\n",
    "                    if len(cur_label) >= 5:\n",
    "                        break\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if True:\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            end_index = group.index[-1]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.25 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - start_lat) <= 0.25 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.25 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.25 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        print('true')\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.25 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.25 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        print('true')\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "                \n",
    "            print(\"cur_label:\", len(cur_label))\n",
    "            if(len(cur_label) > 0):\n",
    "                max_label = np.max(cur_label)\n",
    "                min_label = np.min(cur_label)\n",
    "                print(max_label, min_label)\n",
    "                temp_label = []\n",
    "                tmp_index = []\n",
    "                if max_label - min_label >= 100:\n",
    "                    for k in range(len(cur_label)):\n",
    "                        if abs(cur_label[k] - max_label) <= 10:\n",
    "                            continue\n",
    "                        else:\n",
    "                            temp_label.append(cur_label[k])\n",
    "                            tmp_index.append(index_arr[k])\n",
    "                if(len(temp_label) > 1):\n",
    "                    for k in range(len(tmp_index)):\n",
    "                        indexes.append(tmp_index[k])\n",
    "                    test_label[i] = (np.mean(np.array(temp_label)) + np.median(np.array(temp_label))) / 2\n",
    "                    #print(temp_label)\n",
    "                    print('Matching number: ', len(temp_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                else:\n",
    "                    for k in range(len(index_arr)):\n",
    "                        indexes.append(index_arr[k])\n",
    "                    test_label[i] = (np.mean(np.array(cur_label)) + np.median(np.array(cur_label))) / 2\n",
    "                    #print(cur_label)\n",
    "                    print('Matching number: ', len(cur_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                #print(temp_label)\n",
    "            else:\n",
    "                print('Not Found!!!')\n",
    "                print('\\n')\n",
    "\n",
    "#train_3_port['timestamp'] = pd.to_datetime(train_3_port['timestamp'], infer_datetime_format=True)\n",
    "port_data = pd.read_csv(port_data_path, usecols = [0, 1, 2, 8])\n",
    "port_data['LONGITUDE'] = pd.to_numeric(port_data['LONGITUDE'])\n",
    "port_data['LATITUDE'] =  pd.to_numeric(port_data['LATITUDE'])\n",
    "port_data['LONGITUDE'] = port_data['LONGITUDE'].astype(float)\n",
    "port_data['LATITUDE'] = port_data['LATITUDE'].astype(float)\n",
    "matching(test_trace, port_data)\n",
    "test_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start matching!!!\n",
      "18\n",
      "['CNSHK', 'ESALG'] 1.271177 103.903023 36.142025 -5.412837000000001\n",
      "中间港到达\n",
      "匹配无路由的数据\n",
      "cur_label: 1174\n",
      "857.2769444444444 372.30083333333334\n",
      "Matching number:  1172 424.3826089069018 11652.95652714022\n",
      "\n",
      "\n",
      "177\n",
      "['CNSHK', 'SGSIN', 'MTMLA', 'DZALG'] 35.821509999999996 14.534182000000001 36.768454999999996 3.064607\n",
      "cur_label: 11\n",
      "566.7038888888889 109.30083333333333\n",
      "Matching number:  8 197.72005208333334 1034.9870115336334\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([351.53032407, 573.11440359, 565.33141577, 875.69463889,\n",
       "       578.09843434, 707.38746212, 575.61265   , 611.7420508 ,\n",
       "       854.31114583, 558.23244658, 488.84276042, 200.1551929 ,\n",
       "       578.09843434, 361.37426136, 304.69225108, 680.09780864,\n",
       "       558.77422138, 644.74375   , 424.38260891, 713.8974359 ,\n",
       "       590.32833333, 575.9772096 , 892.83956019, 590.32833333,\n",
       "       576.82433081, 576.82433081, 681.24865741, 681.24865741,\n",
       "       590.07095238, 681.24865741, 590.32833333, 578.09843434,\n",
       "       590.32833333, 589.26789683, 538.44944444, 578.09843434,\n",
       "       590.32833333, 589.26503968, 590.32833333, 681.24865741,\n",
       "       681.24865741, 590.32833333, 577.08277778, 577.08277778,\n",
       "       548.84947222, 590.32833333, 590.07095238, 589.26789683,\n",
       "       578.09843434, 590.32833333, 548.70944444, 548.70944444,\n",
       "       590.32833333, 398.01960784, 680.17391204, 101.42987817,\n",
       "       578.09843434,  92.2659881 , 578.09843434, 148.66289952,\n",
       "       578.09843434,  26.7939115 , 578.09843434, 578.09843434,\n",
       "        92.2659881 , 722.5775    , 842.40570988,  83.82221955,\n",
       "       148.66289952, 148.66289952,  87.37236483,  86.67597468,\n",
       "       466.14197917, 127.98087317,  92.2659881 ,  92.2659881 ,\n",
       "       592.04763889, 578.09843434, 148.66361968,  87.71710297,\n",
       "         2.98132737, 574.77648569, 574.77648569,  92.2659881 ,\n",
       "        92.2659881 , 148.66289952,  92.2659881 , 148.67956048,\n",
       "       143.12496898,  17.38039886,  92.2659881 , 842.40570988,\n",
       "        92.2659881 , 842.40570988,  88.3001942 ,  92.2659881 ,\n",
       "        86.27167636, 141.99131703, 141.99131703, 842.40570988,\n",
       "       148.66289952, 148.66289952, 315.57406687,  84.25787124,\n",
       "        92.2659881 ,  92.2659881 ,  92.2659881 , 148.66215878,\n",
       "       148.66215878, 578.09843434, 504.66199275, 478.40849206,\n",
       "       478.40849206,  92.2659881 ,  92.2659881 ,  92.2659881 ,\n",
       "       578.09843434, 578.09843434,  92.2659881 ,  92.2659881 ,\n",
       "        92.2659881 , 478.40849206,  92.2659881 ,  92.2659881 ,\n",
       "        92.2659881 , 596.94069444, 430.52473611, 719.06332071,\n",
       "       378.75859127, 384.92972222, 405.98927885, 711.89189236,\n",
       "       707.65512731, 664.73467593, 551.90443866, 429.61180556,\n",
       "       464.76263889, 376.8937037 , 566.771     , 455.94967014,\n",
       "       448.59613426, 479.19293981, 380.93697338, 470.63      ,\n",
       "       410.72135031, 554.75431713, 631.02180556, 195.13491898,\n",
       "       621.17630556, 627.42796296,  15.1545463 , 112.6170101 ,\n",
       "       427.54611111, 378.16460106,   4.66297222, 523.97409323,\n",
       "        89.923     ,  10.30147222, 124.07150463, 202.75100505,\n",
       "        26.83478819,  54.31680556,  75.91666667, 190.66097222,\n",
       "        94.74114316, 251.37512153, 363.51030556,  28.442423  ,\n",
       "       156.11380556, 151.32628502, 210.96998016,  61.89465856,\n",
       "       244.59819444,  17.68572222, 258.565     ,  62.2325    ,\n",
       "       244.52265432, 197.72005208, 197.72005208, 590.64417259,\n",
       "       223.56349359, 247.71366162,  92.2659881 ,  52.40259259,\n",
       "       185.30969444, 125.37779762, 303.18520202,  82.28078704,\n",
       "        75.00896766, 483.59318627, 101.93527347,  80.64805556,\n",
       "        82.28078704, 106.89402778,  82.88633578, 118.60030882,\n",
       "       109.87316615, 101.93527347,  82.28078704, 100.95199561])"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修改，使用groupby('TRANSPORT_TRACE')\n",
    "\n",
    "def matching(test_trace, port_data):\n",
    "    #train_data.sort_values(['loadingOrder', 'timestamp'], inplace = True)\n",
    "    train_group_1 = train_data.groupby('TRANSPORT_TRACE')\n",
    "    port_group = port_data.groupby('TRANS_NODE_NAME')\n",
    "    print('start matching!!!')\n",
    "    # 首先匹配长度为2的\n",
    "    for i in test_trace.index:\n",
    "        trace = test_trace.loc[i, 'TRANSPORT_TRACE'].split('-')\n",
    "        if test_label[i] == 0:\n",
    "            print(i)\n",
    "            cur_order = []\n",
    "            cur_label = []\n",
    "            seconds = []\n",
    "            dis_arr = []\n",
    "            index_arr = []\n",
    "            true_lat = port_group.get_group(trace[0]).iloc[0, 2]\n",
    "            true_lon = port_group.get_group(trace[0]).iloc[0, 1]\n",
    "            start_lat = test_trace.loc[i, 'latitude']\n",
    "            start_lon = test_trace.loc[i, 'longitude']\n",
    "            end_lat = port_group.get_group(trace[-1]).iloc[0, 2]\n",
    "            end_lon = port_group.get_group(trace[-1]).iloc[0, 1]\n",
    "            print(trace, start_lat, start_lon, end_lat, end_lon)\n",
    "            if trace == ['HUNGARY', 'HON']:\n",
    "                trace = ['HUNGARY', 'HONGKONG']\n",
    "            if trace == ['LTKLJ', 'DEHAM', 'HKHKG']:\n",
    "                trace = ['LTKLJ', 'DEHAM', 'HONG KONG_HONG KONG']\n",
    "            if trace == ['CNSHK', 'SGSIN', 'MTMLA', 'DZALG']:\n",
    "                trace = ['CNYTN', 'SGSIN', 'MTMLA', 'DZALG']\n",
    "            for name1, group1 in train_group_1:\n",
    "                # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                cur_trace = str(name1).split('-')\n",
    "                # 判断是否含有终点\n",
    "                if trace == cur_trace:\n",
    "                    cur_order.append(name1)\n",
    "                    train_group_2 = group1.groupby('loadingOrder')\n",
    "                    for name, group in train_group_2:\n",
    "                        #index.append(list(group_2.index)[0])\n",
    "                        start_time = group.loc[group.index[0], 'timestamp']\n",
    "                        start_index = group.index[0]\n",
    "                        judge = False\n",
    "                        for j in group.index:\n",
    "                            # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                            if abs(group.loc[j, 'longitude'] - start_lon) <= 0.2 and \\\n",
    "                                abs(group.loc[j, 'latitude'] - start_lat) <= 0.2 and \\\n",
    "                                not judge:\n",
    "                                start_time = group.loc[j, 'timestamp']\n",
    "                                start_index = j\n",
    "                                judge = True\n",
    "\n",
    "                        if judge:\n",
    "                            tmp = []\n",
    "                            for j in group.index[::-1]:\n",
    "                                if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                    group.loc[j, 'speed'] > 0:\n",
    "                                    tmp.append(name)\n",
    "                                    tmp.append(start_index)\n",
    "                                    tmp.append(j)\n",
    "                                    tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                    index_arr.append(tmp)\n",
    "                                    time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                    cur_label.append(time_gap)\n",
    "                                    break\n",
    "            if len(cur_label) < 3:\n",
    "                print('中间港到达')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    # group.drop_duplicates(subset = ['longitude', 'latitude'], keep = 'first', inplace = True)\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if trace[0] in cur_trace and trace[1] in cur_trace and \\\n",
    "                        cur_trace.index(trace[0]) < cur_trace.index(trace[1]):\n",
    "                        cur_order.append(name1)\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            #index.append(list(group_2.index)[0])\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.2 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - start_lat) <= 0.2 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "            if len(cur_label) < 3:\n",
    "                print('匹配无路由的数据')\n",
    "                for name1, group1 in train_group_1:\n",
    "                    if name in cur_order:\n",
    "                        continue\n",
    "                    if len(cur_label) >= 10:\n",
    "                        break\n",
    "                    cur_trace = str(name1).split('-')\n",
    "                    if trace[-1] in cur_trace or len(cur_trace) < 2:\n",
    "                        train_group_2 = group1.groupby('loadingOrder')\n",
    "                        for name, group in train_group_2:\n",
    "                            start_time = group.loc[group.index[0], 'timestamp']\n",
    "                            start_index = group.index[0]\n",
    "                            end_index = group.index[-1]\n",
    "                            judge = False\n",
    "                            for j in group.index:\n",
    "                                # 如果当前GPS数据与终点的经纬度相差在某个范围内，并且速度为0，则表示停靠在中间港口\n",
    "                                if abs(group.loc[j, 'longitude'] - start_lon) <= 0.2 and \\\n",
    "                                    abs(group.loc[j, 'latitude'] - start_lat) <= 0.2 and \\\n",
    "                                    not judge:\n",
    "                                    start_time = group.loc[j, 'timestamp']\n",
    "                                    start_index = j\n",
    "                                    judge = True\n",
    "                            if judge:\n",
    "                                tmp = []\n",
    "                                for j in group.index[::-1]:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break\n",
    "                                tmp = []\n",
    "                                for j in group.index:\n",
    "                                    if abs(group.loc[j, 'longitude'] - end_lon) < 0.2 and \\\n",
    "                                        abs(group.loc[j, 'latitude'] - end_lat) < 0.2 and \\\n",
    "                                        group.loc[j, 'speed'] > 0:\n",
    "                                        tmp.append(name)\n",
    "                                        tmp.append(start_index)\n",
    "                                        tmp.append(j)\n",
    "                                        tmp.append(test_trace.loc[i, 'loadingOrder'])\n",
    "                                        index_arr.append(tmp)\n",
    "                                        time_gap = abs((group.loc[j, 'timestamp'] - start_time).total_seconds() / 3600)\n",
    "                                        cur_label.append(time_gap)\n",
    "                                        break \n",
    "                \n",
    "            print(\"cur_label:\", len(cur_label))\n",
    "            if(len(cur_label) > 0):\n",
    "                max_label = np.max(cur_label)\n",
    "                min_label = np.min(cur_label)\n",
    "                print(max_label, min_label)\n",
    "                temp_label = []\n",
    "                tmp_index = []\n",
    "                if max_label - min_label >= 100:\n",
    "                    for k in range(len(cur_label)):\n",
    "                        if abs(cur_label[k] - max_label) <= 10:\n",
    "                            continue\n",
    "                        else:\n",
    "                            temp_label.append(cur_label[k])\n",
    "                            tmp_index.append(index_arr[k])\n",
    "                if(len(temp_label) > 1):\n",
    "                    for k in range(len(tmp_index)):\n",
    "                        indexes.append(tmp_index[k])\n",
    "                    test_label[i] = (np.mean(np.array(temp_label)) + np.median(np.array(temp_label))) / 2\n",
    "                    #print(temp_label)\n",
    "                    print('Matching number: ', len(temp_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                else:\n",
    "                    for k in range(len(index_arr)):\n",
    "                        indexes.append(index_arr[k])\n",
    "                    test_label[i] = (np.mean(np.array(cur_label)) + np.median(np.array(cur_label))) / 2\n",
    "                    #print(cur_label)\n",
    "                    print('Matching number: ', len(cur_label), test_label[i], \\\n",
    "                          distance((start_lat, start_lon), (end_lat, end_lon)).km)\n",
    "                    print('\\n')\n",
    "                #print(temp_label)\n",
    "            else:\n",
    "                print('Not Found!!!')\n",
    "                print('\\n')\n",
    "                \n",
    "#train_3_port['timestamp'] = pd.to_datetime(train_3_port['timestamp'], infer_datetime_format=True)\n",
    "port_data = pd.read_csv(port_data_path, usecols = [0, 1, 2, 8])\n",
    "port_data['LONGITUDE'] = pd.to_numeric(port_data['LONGITUDE'])\n",
    "port_data['LATITUDE'] =  pd.to_numeric(port_data['LATITUDE'])\n",
    "port_data['LONGITUDE'] = port_data['LONGITUDE'].astype(float)\n",
    "port_data['LATITUDE'] = port_data['LATITUDE'].astype(float)\n",
    "matching(test_trace, port_data)\n",
    "test_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6527"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 生成所需导出数据的索引\n",
    "index = []\n",
    "for i in range(len(indexes)):\n",
    "    judge = False\n",
    "    if len(index) < 1:\n",
    "        index.append(indexes[i])\n",
    "        continue\n",
    "    else:\n",
    "        for j in range(len(index)):\n",
    "            if indexes[i][0] == index[j][0] and abs(indexes[i][1] - index[j][1]) < 10 and abs(indexes[i][2] - index[j][2]) < 10:\n",
    "                judge = True\n",
    "                break\n",
    "    if not judge:\n",
    "        index.append(indexes[i])\n",
    "len(index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AE105181004450', 256325, 257785, 'QM149151037282']"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "500\n",
      "1000\n",
      "1500\n",
      "2000\n",
      "2500\n",
      "3000\n",
      "3500\n",
      "4000\n",
      "4500\n",
      "5000\n",
      "5500\n",
      "6000\n",
      "6500\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>test_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AE105181004450_0</td>\n",
       "      <td>2019-06-28 02:07:20+00:00</td>\n",
       "      <td>114.25000</td>\n",
       "      <td>22.562500</td>\n",
       "      <td>Q4197827697</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7280</td>\n",
       "      <td>QM149151037282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AE105181004450_0</td>\n",
       "      <td>2019-06-28 02:09:04+00:00</td>\n",
       "      <td>114.25000</td>\n",
       "      <td>22.562500</td>\n",
       "      <td>Q4197827697</td>\n",
       "      <td>11.0</td>\n",
       "      <td>7700</td>\n",
       "      <td>QM149151037282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AE105181004450_0</td>\n",
       "      <td>2019-06-28 02:11:30+00:00</td>\n",
       "      <td>114.25000</td>\n",
       "      <td>22.562500</td>\n",
       "      <td>Q4197827697</td>\n",
       "      <td>17.0</td>\n",
       "      <td>9060</td>\n",
       "      <td>QM149151037282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AE105181004450_0</td>\n",
       "      <td>2019-06-28 02:14:40+00:00</td>\n",
       "      <td>114.25000</td>\n",
       "      <td>22.562500</td>\n",
       "      <td>Q4197827697</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9110</td>\n",
       "      <td>QM149151037282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AE105181004450_0</td>\n",
       "      <td>2019-06-28 02:16:51+00:00</td>\n",
       "      <td>114.31250</td>\n",
       "      <td>22.562500</td>\n",
       "      <td>Q4197827697</td>\n",
       "      <td>24.0</td>\n",
       "      <td>9110</td>\n",
       "      <td>QM149151037282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22017430</th>\n",
       "      <td>ZN895559314183_6526</td>\n",
       "      <td>2020-05-16 20:25:06+00:00</td>\n",
       "      <td>48.40625</td>\n",
       "      <td>29.140625</td>\n",
       "      <td>P6944255006</td>\n",
       "      <td>30.0</td>\n",
       "      <td>29780</td>\n",
       "      <td>QX924743867365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22017431</th>\n",
       "      <td>ZN895559314183_6526</td>\n",
       "      <td>2020-05-16 20:27:06+00:00</td>\n",
       "      <td>48.40625</td>\n",
       "      <td>29.156250</td>\n",
       "      <td>P6944255006</td>\n",
       "      <td>30.0</td>\n",
       "      <td>29840</td>\n",
       "      <td>QX924743867365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22017432</th>\n",
       "      <td>ZN895559314183_6526</td>\n",
       "      <td>2020-05-16 20:29:12+00:00</td>\n",
       "      <td>48.40625</td>\n",
       "      <td>29.156250</td>\n",
       "      <td>P6944255006</td>\n",
       "      <td>30.0</td>\n",
       "      <td>29750</td>\n",
       "      <td>QX924743867365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22017433</th>\n",
       "      <td>ZN895559314183_6526</td>\n",
       "      <td>2020-05-16 20:31:36+00:00</td>\n",
       "      <td>48.37500</td>\n",
       "      <td>29.156250</td>\n",
       "      <td>P6944255006</td>\n",
       "      <td>30.0</td>\n",
       "      <td>29780</td>\n",
       "      <td>QX924743867365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22017434</th>\n",
       "      <td>ZN895559314183_6526</td>\n",
       "      <td>2020-05-16 20:35:24+00:00</td>\n",
       "      <td>48.37500</td>\n",
       "      <td>29.171875</td>\n",
       "      <td>P6944255006</td>\n",
       "      <td>30.0</td>\n",
       "      <td>29810</td>\n",
       "      <td>QX924743867365</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>22017435 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 loadingOrder                 timestamp  longitude   latitude  \\\n",
       "0            AE105181004450_0 2019-06-28 02:07:20+00:00  114.25000  22.562500   \n",
       "1            AE105181004450_0 2019-06-28 02:09:04+00:00  114.25000  22.562500   \n",
       "2            AE105181004450_0 2019-06-28 02:11:30+00:00  114.25000  22.562500   \n",
       "3            AE105181004450_0 2019-06-28 02:14:40+00:00  114.25000  22.562500   \n",
       "4            AE105181004450_0 2019-06-28 02:16:51+00:00  114.31250  22.562500   \n",
       "...                       ...                       ...        ...        ...   \n",
       "22017430  ZN895559314183_6526 2020-05-16 20:25:06+00:00   48.40625  29.140625   \n",
       "22017431  ZN895559314183_6526 2020-05-16 20:27:06+00:00   48.40625  29.156250   \n",
       "22017432  ZN895559314183_6526 2020-05-16 20:29:12+00:00   48.40625  29.156250   \n",
       "22017433  ZN895559314183_6526 2020-05-16 20:31:36+00:00   48.37500  29.156250   \n",
       "22017434  ZN895559314183_6526 2020-05-16 20:35:24+00:00   48.37500  29.171875   \n",
       "\n",
       "           vesselMMSI  speed  direction      test_order  \n",
       "0         Q4197827697    7.0       7280  QM149151037282  \n",
       "1         Q4197827697   11.0       7700  QM149151037282  \n",
       "2         Q4197827697   17.0       9060  QM149151037282  \n",
       "3         Q4197827697   20.0       9110  QM149151037282  \n",
       "4         Q4197827697   24.0       9110  QM149151037282  \n",
       "...               ...    ...        ...             ...  \n",
       "22017430  P6944255006   30.0      29780  QX924743867365  \n",
       "22017431  P6944255006   30.0      29840  QX924743867365  \n",
       "22017432  P6944255006   30.0      29750  QX924743867365  \n",
       "22017433  P6944255006   30.0      29780  QX924743867365  \n",
       "22017434  P6944255006   30.0      29810  QX924743867365  \n",
       "\n",
       "[22017435 rows x 8 columns]"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 生成训练数据\n",
    "gc.collect()\n",
    "data = []\n",
    "train_group = train_data.groupby('loadingOrder')\n",
    "names = ['loadingOrder', 'timestamp', 'longitude', 'latitude', 'vesselMMSI', 'speed', 'direction']\n",
    "for i in range(len(names)):\n",
    "    data.append([])\n",
    "data.append([])\n",
    "for i in range(len(index)):\n",
    "    if i % 500 == 0:\n",
    "        print(i)\n",
    "    order = index[i][0]\n",
    "    start = int(index[i][1])\n",
    "    end = int(index[i][2])\n",
    "    group = train_group.get_group(order).loc[start : end + 1, :]\n",
    "    order = order + '_' + str(i)\n",
    "    orders = [order] * len(group)\n",
    "    test_order = [index[i][3]] * len(group)\n",
    "    group['loadingOrder'] = orders\n",
    "    group['test_order'] = test_order\n",
    "    \n",
    "    for j in range(len(names)):\n",
    "        data[j].extend(np.array(group.loc[:, names[j]]))\n",
    "    data[len(names)].extend(test_order)\n",
    "names.append('test_order')\n",
    "col_names = names\n",
    "filter_data = pd.DataFrame({x : y for x, y in zip(col_names, data)})\n",
    "filter_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "filter_data.to_csv('data_train_5.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>carrierName</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>onboardDate</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>temp_timestamp</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "      <th>diff_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6572</th>\n",
       "      <td>MI815639344195</td>\n",
       "      <td>2019-04-09 13:19:28</td>\n",
       "      <td>122.522538</td>\n",
       "      <td>29.730088</td>\n",
       "      <td>11.0</td>\n",
       "      <td>27670</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>D7828192902</td>\n",
       "      <td>2019-04-01 18:32:58</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>2019-04-09T13:19:28.000Z</td>\n",
       "      <td>-0.829882</td>\n",
       "      <td>0.331981</td>\n",
       "      <td>2160.166667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6632</th>\n",
       "      <td>MI815639344195</td>\n",
       "      <td>2019-04-13 02:54:48</td>\n",
       "      <td>137.412612</td>\n",
       "      <td>39.816323</td>\n",
       "      <td>32.0</td>\n",
       "      <td>5680</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>D7828192902</td>\n",
       "      <td>2019-04-01 18:32:58</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>2019-04-13T02:54:48.000Z</td>\n",
       "      <td>8.361436</td>\n",
       "      <td>12.015105</td>\n",
       "      <td>2419.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8239</th>\n",
       "      <td>GP579408164626</td>\n",
       "      <td>2019-05-12 23:07:28</td>\n",
       "      <td>103.814550</td>\n",
       "      <td>1.195083</td>\n",
       "      <td>6.0</td>\n",
       "      <td>17360</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>N9037513561</td>\n",
       "      <td>2019-05-08 01:06:18</td>\n",
       "      <td>CNSHK-ESALG</td>\n",
       "      <td>2019-05-12T23:07:28.000Z</td>\n",
       "      <td>-0.005450</td>\n",
       "      <td>0.158875</td>\n",
       "      <td>2481.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23983</th>\n",
       "      <td>IV490066651689</td>\n",
       "      <td>2019-09-27 11:31:18</td>\n",
       "      <td>108.006667</td>\n",
       "      <td>7.628333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1</td>\n",
       "      <td>OYSCFP</td>\n",
       "      <td>V7498939809</td>\n",
       "      <td>2019-09-25 09:27:08</td>\n",
       "      <td>CNSHK-PKQCT</td>\n",
       "      <td>2019-09-27T11:31:18.000Z</td>\n",
       "      <td>-7.381497</td>\n",
       "      <td>-2.821338</td>\n",
       "      <td>2092.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26610</th>\n",
       "      <td>JB123387157454</td>\n",
       "      <td>2020-01-10 14:10:28</td>\n",
       "      <td>110.869515</td>\n",
       "      <td>11.837765</td>\n",
       "      <td>33.0</td>\n",
       "      <td>20500</td>\n",
       "      <td>NWLGLX</td>\n",
       "      <td>R3419973056</td>\n",
       "      <td>2020-01-09 00:54:28</td>\n",
       "      <td>CNSHK-MYTPP</td>\n",
       "      <td>2020-01-10T14:10:28.000Z</td>\n",
       "      <td>-9.555013</td>\n",
       "      <td>-3.415565</td>\n",
       "      <td>2009.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31898</th>\n",
       "      <td>FA712454830300</td>\n",
       "      <td>2020-02-12 11:11:18</td>\n",
       "      <td>57.395283</td>\n",
       "      <td>-20.143960</td>\n",
       "      <td>11.0</td>\n",
       "      <td>9000</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>D2070531050</td>\n",
       "      <td>2020-01-28 04:52:18</td>\n",
       "      <td>CNSHK-MYTPP</td>\n",
       "      <td>2020-02-12T11:11:18.000Z</td>\n",
       "      <td>-42.201127</td>\n",
       "      <td>-56.968900</td>\n",
       "      <td>21828.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37422</th>\n",
       "      <td>ZE957090425955</td>\n",
       "      <td>2020-03-18 22:25:58</td>\n",
       "      <td>112.110460</td>\n",
       "      <td>14.356855</td>\n",
       "      <td>23.0</td>\n",
       "      <td>20500</td>\n",
       "      <td>OYSCFP</td>\n",
       "      <td>G1659327376</td>\n",
       "      <td>2020-03-17 03:57:28</td>\n",
       "      <td>CNSHK-SGSIN</td>\n",
       "      <td>2020-03-18T22:25:58.000Z</td>\n",
       "      <td>-7.989583</td>\n",
       "      <td>-1.915813</td>\n",
       "      <td>2481.833333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37423</th>\n",
       "      <td>WO664383377169</td>\n",
       "      <td>2020-03-18 22:25:58</td>\n",
       "      <td>112.110460</td>\n",
       "      <td>14.356855</td>\n",
       "      <td>23.0</td>\n",
       "      <td>20500</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>G1659327376</td>\n",
       "      <td>2020-03-17 03:57:28</td>\n",
       "      <td>CNSHK-SGSIN</td>\n",
       "      <td>2020-03-18T22:25:58.000Z</td>\n",
       "      <td>-7.989583</td>\n",
       "      <td>-1.915813</td>\n",
       "      <td>2481.833333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37424</th>\n",
       "      <td>UD485960146467</td>\n",
       "      <td>2020-03-18 22:25:58</td>\n",
       "      <td>112.110460</td>\n",
       "      <td>14.356855</td>\n",
       "      <td>23.0</td>\n",
       "      <td>20500</td>\n",
       "      <td>OYSCFP</td>\n",
       "      <td>G1659327376</td>\n",
       "      <td>2020-03-17 03:57:28</td>\n",
       "      <td>CNSHK-SGSIN</td>\n",
       "      <td>2020-03-18T22:25:58.000Z</td>\n",
       "      <td>-7.989583</td>\n",
       "      <td>-1.915813</td>\n",
       "      <td>2481.833333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         loadingOrder           timestamp   longitude   latitude  speed  \\\n",
       "6572   MI815639344195 2019-04-09 13:19:28  122.522538  29.730088   11.0   \n",
       "6632   MI815639344195 2019-04-13 02:54:48  137.412612  39.816323   32.0   \n",
       "8239   GP579408164626 2019-05-12 23:07:28  103.814550   1.195083    6.0   \n",
       "23983  IV490066651689 2019-09-27 11:31:18  108.006667   7.628333    0.0   \n",
       "26610  JB123387157454 2020-01-10 14:10:28  110.869515  11.837765   33.0   \n",
       "31898  FA712454830300 2020-02-12 11:11:18   57.395283 -20.143960   11.0   \n",
       "37422  ZE957090425955 2020-03-18 22:25:58  112.110460  14.356855   23.0   \n",
       "37423  WO664383377169 2020-03-18 22:25:58  112.110460  14.356855   23.0   \n",
       "37424  UD485960146467 2020-03-18 22:25:58  112.110460  14.356855   23.0   \n",
       "\n",
       "       direction carrierName   vesselMMSI         onboardDate TRANSPORT_TRACE  \\\n",
       "6572       27670      OIEQNT  D7828192902 2019-04-01 18:32:58     CNYTN-MXZLO   \n",
       "6632        5680      OIEQNT  D7828192902 2019-04-01 18:32:58     CNYTN-MXZLO   \n",
       "8239       17360      RWHZVZ  N9037513561 2019-05-08 01:06:18     CNSHK-ESALG   \n",
       "23983         -1      OYSCFP  V7498939809 2019-09-25 09:27:08     CNSHK-PKQCT   \n",
       "26610      20500      NWLGLX  R3419973056 2020-01-09 00:54:28     CNSHK-MYTPP   \n",
       "31898       9000      RWHZVZ  D2070531050 2020-01-28 04:52:18     CNSHK-MYTPP   \n",
       "37422      20500      OYSCFP  G1659327376 2020-03-17 03:57:28     CNSHK-SGSIN   \n",
       "37423      20500      OIEQNT  G1659327376 2020-03-17 03:57:28     CNSHK-SGSIN   \n",
       "37424      20500      OYSCFP  G1659327376 2020-03-17 03:57:28     CNSHK-SGSIN   \n",
       "\n",
       "                 temp_timestamp   diff_lat   diff_lon     diff_time  \n",
       "6572   2019-04-09T13:19:28.000Z  -0.829882   0.331981   2160.166667  \n",
       "6632   2019-04-13T02:54:48.000Z   8.361436  12.015105   2419.500000  \n",
       "8239   2019-05-12T23:07:28.000Z  -0.005450   0.158875   2481.000000  \n",
       "23983  2019-09-27T11:31:18.000Z  -7.381497  -2.821338   2092.000000  \n",
       "26610  2020-01-10T14:10:28.000Z  -9.555013  -3.415565   2009.000000  \n",
       "31898  2020-02-12T11:11:18.000Z -42.201127 -56.968900  21828.500000  \n",
       "37422  2020-03-18T22:25:58.000Z  -7.989583  -1.915813   2481.833333  \n",
       "37423  2020-03-18T22:25:58.000Z  -7.989583  -1.915813   2481.833333  \n",
       "37424  2020-03-18T22:25:58.000Z  -7.989583  -1.915813   2481.833333  "
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 分析测试数据\n",
    "\n",
    "test_data['diff_lat'] = test_data.groupby('loadingOrder')['latitude'].diff(1)\n",
    "test_data['diff_lon'] = test_data.groupby('loadingOrder')['longitude'].diff(1)\n",
    "test_data['diff_time'] = test_data.groupby('loadingOrder')['timestamp'].diff(1).dt.total_seconds() / 60\n",
    "test_data.fillna(0, inplace = True)\n",
    "test_back = test_data[abs(test_data['diff_time']) > 2000]\n",
    "test_back"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['BY832566000088' 'EN802039631720' 'GS673295248432' 'GU421669106132'\n",
      " 'UK693787941550' 'YX405201296762']\n",
      "2471\n"
     ]
    },
    {
     "ename": "ImportError",
     "evalue": "The descartes package is required for plotting polygons in geopandas. You can install it using 'conda install -c conda-forge descartes' or 'pip install descartes'.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36m_plot_polygon_collection\u001b[0;34m(ax, geoms, values, color, cmap, vmin, vmax, **kwargs)\u001b[0m\n\u001b[1;32m    119\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 120\u001b[0;31m         \u001b[0;32mfrom\u001b[0m \u001b[0mdescartes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpatch\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mPolygonPatch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    121\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'descartes'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-ba7aa9201cb8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m15\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[0mworld\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'naturalearth_lowres'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mworld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     30\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlng\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlat\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlng\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlat\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/geodataframe.py\u001b[0m in \u001b[0;36mplot\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    919\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mthere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    920\u001b[0m         \"\"\"\n\u001b[0;32m--> 921\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mplot_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    922\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    923\u001b[0m     \u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_dataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36mplot_dataframe\u001b[0;34m(df, column, cmap, color, ax, cax, categorical, legend, scheme, k, vmin, vmax, markersize, figsize, legend_kwds, categories, classification_kwds, missing_kwds, aspect, **style_kwds)\u001b[0m\n\u001b[1;32m    621\u001b[0m             \u001b[0mmarkersize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmarkersize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    622\u001b[0m             \u001b[0maspect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maspect\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 623\u001b[0;31m             \u001b[0;34m**\u001b[0m\u001b[0mstyle_kwds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    624\u001b[0m         )\n\u001b[1;32m    625\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36mplot_series\u001b[0;34m(s, cmap, color, ax, figsize, aspect, **style_kwds)\u001b[0m\n\u001b[1;32m    412\u001b[0m         \u001b[0mvalues_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpoly_idx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcmap\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    413\u001b[0m         _plot_polygon_collection(\n\u001b[0;32m--> 414\u001b[0;31m             \u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfacecolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfacecolor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmap\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcmap\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mstyle_kwds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    415\u001b[0m         )\n\u001b[1;32m    416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36m_plot_polygon_collection\u001b[0;34m(ax, geoms, values, color, cmap, vmin, vmax, **kwargs)\u001b[0m\n\u001b[1;32m    121\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    122\u001b[0m         raise ImportError(\n\u001b[0;32m--> 123\u001b[0;31m             \u001b[0;34m\"The descartes package is required for plotting polygons in geopandas. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    124\u001b[0m             \u001b[0;34m\"You can install it using 'conda install -c conda-forge descartes' or \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    125\u001b[0m             \u001b[0;34m\"'pip install descartes'.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mImportError\u001b[0m: The descartes package is required for plotting polygons in geopandas. You can install it using 'conda install -c conda-forge descartes' or 'pip install descartes'."
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1sAAANSCAYAAACTM9TNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHbZJREFUeJzt3V+orfdd5/HPt4lRqLWCOQOSPybg6dRMEOJsMh16YaWdIclFclMkgaKV0NxMFLUIEaVKvLJlKAjxTwZLVbAx9kIPEsmFVhQxJafUCSYlcIhOc4iQWGNuio2Z+c3F3tPZ3dnJXjlZn5Ozs18vOLCetX577e/Fj33O+zzPXs+stQIAAMB2veOtHgAAAODtSGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQMGRsTUzn5mZ52fm717j9ZmZX5uZczPzxMz80PbHBAAAOF42ObP12SS3vM7rtyY5vffnniS/8ebHAgAAON6OjK211l8m+efXWXJHkt9dux5L8t0z873bGhAAAOA4unwL73FVkmf3HZ/fe+4fDy6cmXuye/Yr73znO//je9/73i18ewAAgI4vfelL/7TWOnUhX7uN2JpDnluHLVxrPZjkwSTZ2dlZZ8+e3cK3BwAA6JiZ/3WhX7uNTyM8n+SafcdXJ3luC+8LAABwbG0jts4k+bG9TyV8X5KX1lqvuoQQAADgJDnyMsKZ+VySDyS5cmbOJ/mlJN+WJGut30zySJLbkpxL8vUkP9EaFgAA4Lg4MrbWWncd8fpK8t+2NhEAAMDbwDYuIwQAAOAAsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFCwUWzNzC0z8/TMnJuZ+w55/dqZ+cLMfHlmnpiZ27Y/KgAAwPFxZGzNzGVJHkhya5Ibktw1MzccWPaLSR5ea92U5M4kv77tQQEAAI6TTc5s3Zzk3FrrmbXWy0keSnLHgTUryXftPX53kue2NyIAAMDxs0lsXZXk2X3H5/ee2++Xk3xkZs4neSTJTx72RjNzz8ycnZmzL7zwwgWMCwAAcDxsEltzyHPrwPFdST671ro6yW1Jfm9mXvXea60H11o7a62dU6dOvfFpAQAAjolNYut8kmv2HV+dV18meHeSh5NkrfU3Sb4jyZXbGBAAAOA42iS2Hk9yemaun5krsvsBGGcOrPlqkg8mycz8QHZjy3WCAADAiXVkbK21Xklyb5JHk3wlu586+OTM3D8zt+8t+3iSj83M/0zyuSQfXWsdvNQQAADgxLh8k0VrrUey+8EX+5/7xL7HTyV5/3ZHAwAAOL42uqkxAAAAb4zYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKNgotmbmlpl5embOzcx9r7HmR2fmqZl5cmZ+f7tjAgAAHC+XH7VgZi5L8kCS/5LkfJLHZ+bMWuupfWtOJ/n5JO9fa704M/+uNTAAAMBxsMmZrZuTnFtrPbPWejnJQ0nuOLDmY0keWGu9mCRrree3OyYAAMDxsklsXZXk2X3H5/ee2+89Sd4zM389M4/NzC3bGhAAAOA4OvIywiRzyHPrkPc5neQDSa5O8lczc+Na61++5Y1m7klyT5Jce+21b3hYAACA42KTM1vnk1yz7/jqJM8dsuaP11r/ttb6+yRPZze+vsVa68G11s5aa+fUqVMXOjMAAMAlb5PYejzJ6Zm5fmauSHJnkjMH1vxRkh9Jkpm5MruXFT6zzUEBAACOkyNja631SpJ7kzya5CtJHl5rPTkz98/M7XvLHk3ytZl5KskXkvzcWutrraEBAAAudbPWwV+/ujh2dnbW2bNn35LvDQAAsImZ+dJaa+dCvnajmxoDAADwxogtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAArEFAABQILYAAAAKxBYAAECB2AIAACgQWwAAAAViCwAAoEBsAQAAFIgtAACAgo1ia2ZumZmnZ+bczNz3Ous+PDNrZna2NyIAAMDxc2RszcxlSR5IcmuSG5LcNTM3HLLuXUl+KskXtz0kAADAcbPJma2bk5xbaz2z1no5yUNJ7jhk3a8k+WSSf93ifAAAAMfSJrF1VZJn9x2f33vum2bmpiTXrLX+ZIuzAQAAHFubxNYc8tz65osz70jy6SQfP/KNZu6ZmbMzc/aFF17YfEoAAIBjZpPYOp/kmn3HVyd5bt/xu5LcmOQvZuYfkrwvyZnDPiRjrfXgWmtnrbVz6tSpC58aAADgErdJbD2e5PTMXD8zVyS5M8mZ//fiWuultdaVa63r1lrXJXksye1rrbOViQEAAI6BI2NrrfVKknuTPJrkK0keXms9OTP3z8zt7QEBAACOo8s3WbTWeiTJIwee+8RrrP3Amx8LAADgeNvopsYAAAC8MWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUbBRbM3PLzDw9M+dm5r5DXv/ZmXlqZp6YmT+bme/b/qgAAADHx5GxNTOXJXkgya1Jbkhy18zccGDZl5PsrLV+MMnnk3xy24MCAAAcJ5uc2bo5ybm11jNrrZeTPJTkjv0L1lpfWGt9fe/wsSRXb3dMAACA42WT2LoqybP7js/vPfda7k7yp4e9MDP3zMzZmTn7wgsvbD4lAADAMbNJbM0hz61DF858JMlOkk8d9vpa68G11s5aa+fUqVObTwkAAHDMXL7BmvNJrtl3fHWS5w4umpkPJfmFJD+81vrGdsYDAAA4njY5s/V4ktMzc/3MXJHkziRn9i+YmZuS/FaS29daz29/TAAAgOPlyNhaa72S5N4kjyb5SpKH11pPzsz9M3P73rJPJfnOJH84M387M2de4+0AAABOhE0uI8xa65Ekjxx47hP7Hn9oy3MBAAAcaxvd1BgAAIA3RmwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUiC0AAIACsQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACgQGwBAAAUbBRbM3PLzDw9M+dm5r5DXv/2mfmDvde/ODPXbXtQAACA4+TI2JqZy5I8kOTWJDckuWtmbjiw7O4kL661vj/Jp5P86rYHBQAAOE42ObN1c5Jza61n1lovJ3koyR0H1tyR5Hf2Hn8+yQdnZrY3JgAAwPFy+QZrrkry7L7j80n+02utWWu9MjMvJfmeJP+0f9HM3JPknr3Db8zM313I0LBlV+bAXoW3iL3IpcJe5FJgH3Kp+PcX+oWbxNZhZ6jWBazJWuvBJA8mycycXWvtbPD9ocpe5FJhL3KpsBe5FNiHXCpm5uyFfu0mlxGeT3LNvuOrkzz3Wmtm5vIk707yzxc6FAAAwHG3SWw9nuT0zFw/M1ckuTPJmQNrziT58b3HH07y52utV53ZAgAAOCmOvIxw73ew7k3yaJLLknxmrfXkzNyf5Oxa60yS307yezNzLrtntO7c4Hs/+Cbmhm2yF7lU2ItcKuxFLgX2IZeKC96L4wQUAADA9m10U2MAAADeGLEFAABQUI+tmbllZp6emXMzc98hr3/7zPzB3utfnJnr2jNxMm2wF392Zp6amSdm5s9m5vveijl5eztqH+5b9+GZWTPjY4+p2GQvzsyP7v1cfHJmfv9iz8jJsMHfz9fOzBdm5st7f0ff9lbMydvbzHxmZp5/rfsAz65f29unT8zMD23yvtXYmpnLkjyQ5NYkNyS5a2ZuOLDs7iQvrrW+P8mnk/xqcyZOpg334peT7Ky1fjDJ55N88uJOydvdhvswM/OuJD+V5IsXd0JOik324sycTvLzSd6/1voPSX76og/K296GPxd/McnDa62bsvshbL9+cafkhPhsklte5/Vbk5ze+3NPkt/Y5E3bZ7ZuTnJurfXMWuvlJA8luePAmjuS/M7e488n+eDMHHaTZHgzjtyLa60vrLW+vnf4WHbvKQfbtMnPxCT5lezG/r9ezOE4UTbZix9L8sBa68UkWWs9f5Fn5GTYZC+uJN+19/jdefX9XuFNW2v9ZV7/PsF3JPndteuxJN89M9971Pu2Y+uqJM/uOz6/99yha9ZaryR5Kcn3lOfi5NlkL+53d5I/rU7ESXTkPpyZm5Jcs9b6k4s5GCfOJj8T35PkPTPz1zPz2My83v/4woXaZC/+cpKPzMz5JI8k+cmLMxp8izf6b8kkG9xn60067AzVwc+a32QNvFkb77OZ+UiSnSQ/XJ2Ik+h19+HMvCO7l1N/9GINxIm1yc/Ey7N7ucwHsnum/69m5sa11r+UZ+Nk2WQv3pXks2ut/z4z/zm793a9ca31f/rjwTddULO0z2ydT3LNvuOr8+pTv99cMzOXZ/f08OudwoMLsclezMx8KMkvJLl9rfWNizQbJ8dR+/BdSW5M8hcz8w9J3pfkjA/JoGDTv5//eK31b2utv0/ydHbjC7Zpk714d5KHk2St9TdJviPJlRdlOvj/Nvq35EHt2Ho8yemZuX5mrsjuLzWeObDmTJIf33v84SR/vtxpme07ci/uXb71W9kNLb+bQMPr7sO11ktrrSvXWtetta7L7u8O3r7WOvvWjMvb2CZ/P/9Rkh9Jkpm5MruXFT5zUafkJNhkL341yQeTZGZ+ILux9cJFnRJ29+WP7X0q4fuSvLTW+sejvqh6GeFa65WZuTfJo0kuS/KZtdaTM3N/krNrrTNJfju7p4PPZfeM1p3NmTiZNtyLn0rynUn+cO8zWr661rr9LRuat50N9yHUbbgXH03yX2fmqST/O8nPrbW+9tZNzdvRhnvx40n+x8z8THYv2/qo/5hn22bmc9m9bPrKvd8P/KUk35Yka63fzO7vC96W5FySryf5iY3e114FAADYvvpNjQEAAE4isQUAAFAgtgAAAArEFgAAQIHYAgAAKBBbAAAABWILAACg4P8CYd635xkzK8kAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fb4706b1208>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_group = train_data.groupby('TRANSPORT_TRACE')\n",
    "group = train_group.get_group('LTKLJ-DEHAM-HONG KONG_HONG KONG')\n",
    "Order = pd.DataFrame()\n",
    "Order[['loadingOrder']] = group[['loadingOrder']]\n",
    "Order.drop_duplicates(subset = ['loadingOrder'], keep = 'first', inplace = True)\n",
    "Order = np.array(Order['loadingOrder'])\n",
    "print(Order)\n",
    "example = group.groupby('loadingOrder').get_group(Order[0])\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import os\n",
    "import geopandas as gp\n",
    "from shapely.geometry import Point\n",
    "lng = np.array(example['longitude'])\n",
    "lat= np.array(example['latitude'])\n",
    "print(len(lng))\n",
    "#pts = gp.GeoSeries([Point(x, y) for x, y in zip(lng, lat)])\n",
    "#pts.plot()\n",
    "# 世界地图\n",
    "\n",
    "# ['CNSHK', 'GRPIR', 'ITSPE', 'ITGOA'] 5.87038 84.500385 44.40565 8.946256\n",
    "# ['CNYTN', 'HKHKG', 'NZAKL', 'AUBNE'] 11.834633 128.983183 -27.383267 153.164629\n",
    "# ['CNSHK', 'CNNSA', 'MYTPP', 'SGSIN', 'BJCOO', 'CIABJ'] -5.7323 84.065217 5.305333 -4.0041199999999995\n",
    "# ['CNNSA', 'MYTPP', 'SGSIN', 'ZACPT', 'CGPNR', 'GALBV', 'CMKBI'] -22.893365 14.49949 2.939002 9.906216\n",
    "fig, ax = plt.subplots(figsize=(30, 15))\n",
    "world = gp.read_file(gp.datasets.get_path('naturalearth_lowres'))\n",
    "world.plot(ax = ax)\n",
    "print(lng[0], lat[0])\n",
    "print(lng[-1], lat[-1])\n",
    "#cities.plot(ax=ax, marker='o', color='red', markersize=5)\n",
    "plt.scatter(lng[:],lat[:],marker='x',color='g',s=20)#运单轨迹\n",
    "plt.scatter(14.49949, -22.893365, marker='*',color='g',label='start', s=100)#起点\n",
    "plt.scatter(9.906216, 2.939002, marker='*',color='b',label='start', s=100)#起点\n",
    "plt.scatter(lng[0],lat[0],marker='*',color='b',label='start',s=50)#起点\n",
    "plt.scatter(lng[len(lng)-1],lat[len(lat)-1],marker='*',color='r',label='end',s=50)#终点\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>29294183</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-03-05 21:39:00+00:00</td>\n",
       "      <td>121.12500</td>\n",
       "      <td>25.562500</td>\n",
       "      <td>V1577807785</td>\n",
       "      <td>1.0</td>\n",
       "      <td>400</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29294184</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-03-21 09:30:04+00:00</td>\n",
       "      <td>-171.87500</td>\n",
       "      <td>48.343750</td>\n",
       "      <td>V1577807785</td>\n",
       "      <td>32.0</td>\n",
       "      <td>9390</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>22311.066667</td>\n",
       "      <td>22.781250</td>\n",
       "      <td>-293.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29294185</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-03-23 07:44:34+00:00</td>\n",
       "      <td>-151.75000</td>\n",
       "      <td>44.875000</td>\n",
       "      <td>V1577807785</td>\n",
       "      <td>36.0</td>\n",
       "      <td>11210</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>2774.500000</td>\n",
       "      <td>-3.468750</td>\n",
       "      <td>20.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29294186</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-03-23 08:39:46+00:00</td>\n",
       "      <td>-151.37500</td>\n",
       "      <td>44.750000</td>\n",
       "      <td>V1577807785</td>\n",
       "      <td>36.0</td>\n",
       "      <td>11270</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>55.200000</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>0.3750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29294187</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-22 01:10:58+00:00</td>\n",
       "      <td>-76.81250</td>\n",
       "      <td>17.984375</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13000</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>42751.200000</td>\n",
       "      <td>-26.765625</td>\n",
       "      <td>74.5625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29295422</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-26 15:30:59+00:00</td>\n",
       "      <td>-61.53125</td>\n",
       "      <td>10.648438</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5820</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>4.033333</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29295423</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-26 15:34:48+00:00</td>\n",
       "      <td>-61.53125</td>\n",
       "      <td>10.648438</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8350</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>3.816667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29295424</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-26 15:39:08+00:00</td>\n",
       "      <td>-61.53125</td>\n",
       "      <td>10.648438</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5990</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>4.333333</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29295425</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-26 15:43:08+00:00</td>\n",
       "      <td>-61.53125</td>\n",
       "      <td>10.648438</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>33110</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29295426</th>\n",
       "      <td>UK693787941550</td>\n",
       "      <td>2020-04-26 15:46:47+00:00</td>\n",
       "      <td>-61.53125</td>\n",
       "      <td>10.656250</td>\n",
       "      <td>S6489662802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>35430</td>\n",
       "      <td>CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...</td>\n",
       "      <td>3.650000</td>\n",
       "      <td>0.007812</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1244 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            loadingOrder                 timestamp  longitude   latitude  \\\n",
       "29294183  UK693787941550 2020-03-05 21:39:00+00:00  121.12500  25.562500   \n",
       "29294184  UK693787941550 2020-03-21 09:30:04+00:00 -171.87500  48.343750   \n",
       "29294185  UK693787941550 2020-03-23 07:44:34+00:00 -151.75000  44.875000   \n",
       "29294186  UK693787941550 2020-03-23 08:39:46+00:00 -151.37500  44.750000   \n",
       "29294187  UK693787941550 2020-04-22 01:10:58+00:00  -76.81250  17.984375   \n",
       "...                  ...                       ...        ...        ...   \n",
       "29295422  UK693787941550 2020-04-26 15:30:59+00:00  -61.53125  10.648438   \n",
       "29295423  UK693787941550 2020-04-26 15:34:48+00:00  -61.53125  10.648438   \n",
       "29295424  UK693787941550 2020-04-26 15:39:08+00:00  -61.53125  10.648438   \n",
       "29295425  UK693787941550 2020-04-26 15:43:08+00:00  -61.53125  10.648438   \n",
       "29295426  UK693787941550 2020-04-26 15:46:47+00:00  -61.53125  10.656250   \n",
       "\n",
       "           vesselMMSI  speed  direction  \\\n",
       "29294183  V1577807785    1.0        400   \n",
       "29294184  V1577807785   32.0       9390   \n",
       "29294185  V1577807785   36.0      11210   \n",
       "29294186  V1577807785   36.0      11270   \n",
       "29294187  S6489662802    0.0      13000   \n",
       "...               ...    ...        ...   \n",
       "29295422  S6489662802    8.0       5820   \n",
       "29295423  S6489662802    5.0       8350   \n",
       "29295424  S6489662802    3.0       5990   \n",
       "29295425  S6489662802    1.0      33110   \n",
       "29295426  S6489662802    1.0      35430   \n",
       "\n",
       "                                            TRANSPORT_TRACE     diff_time  \\\n",
       "29294183  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      0.000000   \n",
       "29294184  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...  22311.066667   \n",
       "29294185  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...   2774.500000   \n",
       "29294186  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...     55.200000   \n",
       "29294187  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...  42751.200000   \n",
       "...                                                     ...           ...   \n",
       "29295422  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      4.033333   \n",
       "29295423  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      3.816667   \n",
       "29295424  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      4.333333   \n",
       "29295425  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      4.000000   \n",
       "29295426  CNSHK-HKHKG-TWKHH-CNNBG-CNSHA-CNTAO-KRPUS-MXZL...      3.650000   \n",
       "\n",
       "           diff_lat  diff_lon  \n",
       "29294183   0.000000    0.0000  \n",
       "29294184  22.781250 -293.0000  \n",
       "29294185  -3.468750   20.1250  \n",
       "29294186  -0.125000    0.3750  \n",
       "29294187 -26.765625   74.5625  \n",
       "...             ...       ...  \n",
       "29295422   0.000000    0.0000  \n",
       "29295423   0.000000    0.0000  \n",
       "29295424   0.000000    0.0000  \n",
       "29295425   0.000000    0.0000  \n",
       "29295426   0.007812    0.0000  \n",
       "\n",
       "[1244 rows x 11 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.groupby('loadingOrder').get_group(Order[4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>carrierName</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>onboardDate</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>temp_timestamp</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17827</th>\n",
       "      <td>HJ790123244299</td>\n",
       "      <td>2019-06-20 20:22:08+00:00</td>\n",
       "      <td>105.741667</td>\n",
       "      <td>3.906667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>JCMFTA</td>\n",
       "      <td>M1075899155</td>\n",
       "      <td>2019-06-17 13:24:00</td>\n",
       "      <td>CNYTN-MATNG</td>\n",
       "      <td>2019-06-20T20:22:08.000Z</td>\n",
       "      <td>2281.000000</td>\n",
       "      <td>-6.481666</td>\n",
       "      <td>-5.141666</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21755</th>\n",
       "      <td>JH101436980795</td>\n",
       "      <td>2020-03-18 22:25:58+00:00</td>\n",
       "      <td>112.110460</td>\n",
       "      <td>14.356855</td>\n",
       "      <td>23.0</td>\n",
       "      <td>20500.0</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>G1659327376</td>\n",
       "      <td>2020-03-17 03:57:00</td>\n",
       "      <td>CNSHK-SGSIN</td>\n",
       "      <td>2020-03-18T22:25:58.000Z</td>\n",
       "      <td>2481.833333</td>\n",
       "      <td>-7.989583</td>\n",
       "      <td>-1.915813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21822</th>\n",
       "      <td>JK944314963943</td>\n",
       "      <td>2019-08-25 13:46:28+00:00</td>\n",
       "      <td>107.558565</td>\n",
       "      <td>6.629025</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>U9574815950</td>\n",
       "      <td>2019-08-23 08:36:00</td>\n",
       "      <td>CNYTN-MTMLA</td>\n",
       "      <td>2019-08-25T13:46:28.000Z</td>\n",
       "      <td>2243.000000</td>\n",
       "      <td>-11.967172</td>\n",
       "      <td>-5.280282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26955</th>\n",
       "      <td>LS475777921461</td>\n",
       "      <td>2019-06-19 19:53:48+00:00</td>\n",
       "      <td>109.721667</td>\n",
       "      <td>9.883333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>G9393704581</td>\n",
       "      <td>2019-06-17 08:56:00</td>\n",
       "      <td>CNSHK-ZADUR</td>\n",
       "      <td>2019-06-19T19:53:48.000Z</td>\n",
       "      <td>2001.000000</td>\n",
       "      <td>-6.935000</td>\n",
       "      <td>-3.445000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28108</th>\n",
       "      <td>MU690927219794</td>\n",
       "      <td>2019-04-02 13:40:48+00:00</td>\n",
       "      <td>123.619297</td>\n",
       "      <td>18.074185</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>UQCRKD</td>\n",
       "      <td>M3288411589</td>\n",
       "      <td>2019-04-01 03:44:00</td>\n",
       "      <td>CNSHK-CLVAP</td>\n",
       "      <td>2019-04-02T13:40:48.000Z</td>\n",
       "      <td>2036.000000</td>\n",
       "      <td>-3.802782</td>\n",
       "      <td>8.599830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53837</th>\n",
       "      <td>YV342585220795</td>\n",
       "      <td>2019-04-09 13:19:28+00:00</td>\n",
       "      <td>122.522538</td>\n",
       "      <td>29.730088</td>\n",
       "      <td>11.0</td>\n",
       "      <td>27670.0</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>D7828192902</td>\n",
       "      <td>2019-04-01 18:32:00</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>2019-04-09T13:19:28.000Z</td>\n",
       "      <td>2160.166667</td>\n",
       "      <td>-0.829882</td>\n",
       "      <td>0.331981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53856</th>\n",
       "      <td>YV342585220795</td>\n",
       "      <td>2019-04-13 02:54:48+00:00</td>\n",
       "      <td>137.412612</td>\n",
       "      <td>39.816323</td>\n",
       "      <td>32.0</td>\n",
       "      <td>5680.0</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>D7828192902</td>\n",
       "      <td>2019-04-01 18:32:00</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>2019-04-13T02:54:48.000Z</td>\n",
       "      <td>2419.500000</td>\n",
       "      <td>8.361436</td>\n",
       "      <td>12.015105</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         loadingOrder                 timestamp   longitude   latitude  speed  \\\n",
       "17827  HJ790123244299 2019-06-20 20:22:08+00:00  105.741667   3.906667    0.0   \n",
       "21755  JH101436980795 2020-03-18 22:25:58+00:00  112.110460  14.356855   23.0   \n",
       "21822  JK944314963943 2019-08-25 13:46:28+00:00  107.558565   6.629025    0.0   \n",
       "26955  LS475777921461 2019-06-19 19:53:48+00:00  109.721667   9.883333    0.0   \n",
       "28108  MU690927219794 2019-04-02 13:40:48+00:00  123.619297  18.074185    0.0   \n",
       "53837  YV342585220795 2019-04-09 13:19:28+00:00  122.522538  29.730088   11.0   \n",
       "53856  YV342585220795 2019-04-13 02:54:48+00:00  137.412612  39.816323   32.0   \n",
       "\n",
       "       direction carrierName   vesselMMSI         onboardDate TRANSPORT_TRACE  \\\n",
       "17827       -1.0      JCMFTA  M1075899155 2019-06-17 13:24:00     CNYTN-MATNG   \n",
       "21755    20500.0      OIEQNT  G1659327376 2020-03-17 03:57:00     CNSHK-SGSIN   \n",
       "21822       -1.0      RWHZVZ  U9574815950 2019-08-23 08:36:00     CNYTN-MTMLA   \n",
       "26955       -1.0      RWHZVZ  G9393704581 2019-06-17 08:56:00     CNSHK-ZADUR   \n",
       "28108       -1.0      UQCRKD  M3288411589 2019-04-01 03:44:00     CNSHK-CLVAP   \n",
       "53837    27670.0      OIEQNT  D7828192902 2019-04-01 18:32:00     CNYTN-MXZLO   \n",
       "53856     5680.0      OIEQNT  D7828192902 2019-04-01 18:32:00     CNYTN-MXZLO   \n",
       "\n",
       "                 temp_timestamp    diff_time   diff_lat   diff_lon  \n",
       "17827  2019-06-20T20:22:08.000Z  2281.000000  -6.481666  -5.141666  \n",
       "21755  2020-03-18T22:25:58.000Z  2481.833333  -7.989583  -1.915813  \n",
       "21822  2019-08-25T13:46:28.000Z  2243.000000 -11.967172  -5.280282  \n",
       "26955  2019-06-19T19:53:48.000Z  2001.000000  -6.935000  -3.445000  \n",
       "28108  2019-04-02T13:40:48.000Z  2036.000000  -3.802782   8.599830  \n",
       "53837  2019-04-09T13:19:28.000Z  2160.166667  -0.829882   0.331981  \n",
       "53856  2019-04-13T02:54:48.000Z  2419.500000   8.361436  12.015105  "
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data['diff_time'] = test_data.groupby('loadingOrder')['timestamp'].diff(1).dt.total_seconds() / 60\n",
    "test_data['diff_lat'] = test_data.groupby('loadingOrder')['latitude'].diff(1)\n",
    "test_data['diff_lon'] = test_data.groupby('loadingOrder')['longitude'].diff(1)\n",
    "test_data.fillna(0, inplace = True)\n",
    "test_data[test_data['diff_time'] > 2000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>carrierName</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>onboardDate</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>temp_timestamp</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20112</th>\n",
       "      <td>IK608316304754</td>\n",
       "      <td>2019-09-11 17:17:38+00:00</td>\n",
       "      <td>-179.197240</td>\n",
       "      <td>47.813207</td>\n",
       "      <td>35.0</td>\n",
       "      <td>9130.0</td>\n",
       "      <td>OYSCFP</td>\n",
       "      <td>Z7715601678</td>\n",
       "      <td>2019-08-31 21:57:00</td>\n",
       "      <td>CNYTN-PAONX</td>\n",
       "      <td>2019-09-11T17:17:38.000Z</td>\n",
       "      <td>692.5</td>\n",
       "      <td>0.035985</td>\n",
       "      <td>-354.547798</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53856</th>\n",
       "      <td>YV342585220795</td>\n",
       "      <td>2019-04-13 02:54:48+00:00</td>\n",
       "      <td>137.412612</td>\n",
       "      <td>39.816323</td>\n",
       "      <td>32.0</td>\n",
       "      <td>5680.0</td>\n",
       "      <td>OIEQNT</td>\n",
       "      <td>D7828192902</td>\n",
       "      <td>2019-04-01 18:32:00</td>\n",
       "      <td>CNYTN-MXZLO</td>\n",
       "      <td>2019-04-13T02:54:48.000Z</td>\n",
       "      <td>2419.5</td>\n",
       "      <td>8.361436</td>\n",
       "      <td>12.015105</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         loadingOrder                 timestamp   longitude   latitude  speed  \\\n",
       "20112  IK608316304754 2019-09-11 17:17:38+00:00 -179.197240  47.813207   35.0   \n",
       "53856  YV342585220795 2019-04-13 02:54:48+00:00  137.412612  39.816323   32.0   \n",
       "\n",
       "       direction carrierName   vesselMMSI         onboardDate TRANSPORT_TRACE  \\\n",
       "20112     9130.0      OYSCFP  Z7715601678 2019-08-31 21:57:00     CNYTN-PAONX   \n",
       "53856     5680.0      OIEQNT  D7828192902 2019-04-01 18:32:00     CNYTN-MXZLO   \n",
       "\n",
       "                 temp_timestamp  diff_time  diff_lat    diff_lon  \n",
       "20112  2019-09-11T17:17:38.000Z      692.5  0.035985 -354.547798  \n",
       "53856  2019-04-13T02:54:48.000Z     2419.5  8.361436   12.015105  "
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data[abs(test_data['diff_lon']) + abs(test_data['diff_lat']) > 20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loadingOrder</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>speed</th>\n",
       "      <th>direction</th>\n",
       "      <th>carrierName</th>\n",
       "      <th>vesselMMSI</th>\n",
       "      <th>onboardDate</th>\n",
       "      <th>TRANSPORT_TRACE</th>\n",
       "      <th>temp_timestamp</th>\n",
       "      <th>diff_time</th>\n",
       "      <th>diff_lat</th>\n",
       "      <th>diff_lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25454</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 02:09:33+00:00</td>\n",
       "      <td>14.534182</td>\n",
       "      <td>35.821510</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30270.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T02:09:33.000Z</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25455</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 02:09:36+00:00</td>\n",
       "      <td>14.534182</td>\n",
       "      <td>35.821510</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30300.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T02:09:36.000Z</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25456</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 02:15:33+00:00</td>\n",
       "      <td>14.534182</td>\n",
       "      <td>35.821505</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30190.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T02:15:33.000Z</td>\n",
       "      <td>5.950000</td>\n",
       "      <td>-0.000005</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25457</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 02:21:33+00:00</td>\n",
       "      <td>14.534195</td>\n",
       "      <td>35.821523</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30450.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T02:21:33.000Z</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>0.000013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25458</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 02:24:34+00:00</td>\n",
       "      <td>14.534175</td>\n",
       "      <td>35.821523</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30450.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T02:24:34.000Z</td>\n",
       "      <td>3.016667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25774</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 22:48:29+00:00</td>\n",
       "      <td>11.958382</td>\n",
       "      <td>37.062257</td>\n",
       "      <td>26.0</td>\n",
       "      <td>30200.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T22:48:29.000Z</td>\n",
       "      <td>8.133333</td>\n",
       "      <td>0.017529</td>\n",
       "      <td>-0.034771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25775</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 22:55:27+00:00</td>\n",
       "      <td>11.930242</td>\n",
       "      <td>37.076688</td>\n",
       "      <td>25.0</td>\n",
       "      <td>30400.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T22:55:27.000Z</td>\n",
       "      <td>6.966667</td>\n",
       "      <td>0.014431</td>\n",
       "      <td>-0.028140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25776</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 23:28:03+00:00</td>\n",
       "      <td>11.796572</td>\n",
       "      <td>37.147553</td>\n",
       "      <td>26.0</td>\n",
       "      <td>30300.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T23:28:03.000Z</td>\n",
       "      <td>32.600000</td>\n",
       "      <td>0.070865</td>\n",
       "      <td>-0.133670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25777</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 23:43:51+00:00</td>\n",
       "      <td>11.730447</td>\n",
       "      <td>37.182505</td>\n",
       "      <td>27.0</td>\n",
       "      <td>30400.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T23:43:51.000Z</td>\n",
       "      <td>15.800000</td>\n",
       "      <td>0.034952</td>\n",
       "      <td>-0.066125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25778</th>\n",
       "      <td>LM121664928482</td>\n",
       "      <td>2020-06-02 23:59:14+00:00</td>\n",
       "      <td>11.664830</td>\n",
       "      <td>37.218727</td>\n",
       "      <td>27.0</td>\n",
       "      <td>30400.0</td>\n",
       "      <td>RWHZVZ</td>\n",
       "      <td>A2177695011</td>\n",
       "      <td>2020-06-02 02:09:33</td>\n",
       "      <td>CNSHK-SGSIN-MTMLA-DZALG</td>\n",
       "      <td>2020-06-02T23:59:14.000Z</td>\n",
       "      <td>15.383333</td>\n",
       "      <td>0.036222</td>\n",
       "      <td>-0.065617</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>325 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         loadingOrder                 timestamp  longitude   latitude  speed  \\\n",
       "25454  LM121664928482 2020-06-02 02:09:33+00:00  14.534182  35.821510    0.0   \n",
       "25455  LM121664928482 2020-06-02 02:09:36+00:00  14.534182  35.821510    0.0   \n",
       "25456  LM121664928482 2020-06-02 02:15:33+00:00  14.534182  35.821505    0.0   \n",
       "25457  LM121664928482 2020-06-02 02:21:33+00:00  14.534195  35.821523    0.0   \n",
       "25458  LM121664928482 2020-06-02 02:24:34+00:00  14.534175  35.821523    0.0   \n",
       "...               ...                       ...        ...        ...    ...   \n",
       "25774  LM121664928482 2020-06-02 22:48:29+00:00  11.958382  37.062257   26.0   \n",
       "25775  LM121664928482 2020-06-02 22:55:27+00:00  11.930242  37.076688   25.0   \n",
       "25776  LM121664928482 2020-06-02 23:28:03+00:00  11.796572  37.147553   26.0   \n",
       "25777  LM121664928482 2020-06-02 23:43:51+00:00  11.730447  37.182505   27.0   \n",
       "25778  LM121664928482 2020-06-02 23:59:14+00:00  11.664830  37.218727   27.0   \n",
       "\n",
       "       direction carrierName   vesselMMSI         onboardDate  \\\n",
       "25454    30270.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25455    30300.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25456    30190.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25457    30450.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25458    30450.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "...          ...         ...          ...                 ...   \n",
       "25774    30200.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25775    30400.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25776    30300.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25777    30400.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "25778    30400.0      RWHZVZ  A2177695011 2020-06-02 02:09:33   \n",
       "\n",
       "               TRANSPORT_TRACE            temp_timestamp  diff_time  diff_lat  \\\n",
       "25454  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T02:09:33.000Z   0.000000  0.000000   \n",
       "25455  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T02:09:36.000Z   0.050000  0.000000   \n",
       "25456  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T02:15:33.000Z   5.950000 -0.000005   \n",
       "25457  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T02:21:33.000Z   6.000000  0.000018   \n",
       "25458  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T02:24:34.000Z   3.016667  0.000000   \n",
       "...                        ...                       ...        ...       ...   \n",
       "25774  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T22:48:29.000Z   8.133333  0.017529   \n",
       "25775  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T22:55:27.000Z   6.966667  0.014431   \n",
       "25776  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T23:28:03.000Z  32.600000  0.070865   \n",
       "25777  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T23:43:51.000Z  15.800000  0.034952   \n",
       "25778  CNSHK-SGSIN-MTMLA-DZALG  2020-06-02T23:59:14.000Z  15.383333  0.036222   \n",
       "\n",
       "       diff_lon  \n",
       "25454  0.000000  \n",
       "25455  0.000000  \n",
       "25456  0.000000  \n",
       "25457  0.000013  \n",
       "25458 -0.000020  \n",
       "...         ...  \n",
       "25774 -0.034771  \n",
       "25775 -0.028140  \n",
       "25776 -0.133670  \n",
       "25777 -0.066125  \n",
       "25778 -0.065617  \n",
       "\n",
       "[325 rows x 14 columns]"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data.groupby('TRANSPORT_TRACE').get_group('CNSHK-SGSIN-MTMLA-DZALG')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_label = np.array([346.51371528, 565.09092391, 562.05565   , 875.78627778,\n",
    "       577.34510851, 707.54059343, 574.64097222, 587.25064815,\n",
    "       854.09850694, 558.19251068, 486.17434524,   0.        ,\n",
    "       577.34510851, 332.24729167, 285.75469697, 677.7309375 ,\n",
    "       547.81715278, 606.05962963,   0.        , 713.67077991,\n",
    "       590.32833333, 565.16898727,   0.        , 590.32833333,\n",
    "       565.63923032, 565.63923032, 679.26095486, 679.26095486,\n",
    "       589.26789683, 679.26095486, 590.32833333, 577.34510851,\n",
    "       590.07095238, 588.98914683,   0.        , 577.34510851,\n",
    "       590.32833333, 588.9010119 , 590.32833333, 679.26095486,\n",
    "       679.26095486, 590.32833333, 565.96857639, 565.96857639,\n",
    "       549.49165741, 590.32833333, 589.11275794, 588.9010119 ,\n",
    "       577.34510851, 590.07095238, 529.21761111, 529.21761111,\n",
    "       590.32833333, 398.01960784,   0.        , 100.77880719,\n",
    "       577.34510851,  92.32464659, 577.34510851, 148.33700539,\n",
    "       577.34510851,  19.74736806, 577.34510851, 577.34510851,\n",
    "       101.        ,   0.        , 843.17305556,  85.7640424 ,\n",
    "       148.33700539, 148.33700539,  87.9519494 ,  87.74450231,\n",
    "       458.48430556, 129.04661537,  92.32464659,  89.04403747,\n",
    "       599.21581944,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ,\n",
    "         0.        ,   0.        ,   0.        ,   0.        ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1124"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
